Exemple #1
0
    def encode(self, x_input, x_query, answer):
        m = self.encode_input(x_input)
        u = self.encode_query(x_query)

#        print "m.data.shape", m.data.shape
#        print "u.data.shape", u.data.shape
        mu = functions.matmul(m, u, transb=True)
        # print "mu.data.shape", mu.data.shape
        # print "mu.data",  mu.data
        p = functions.softmax(mu)
#        print p.data
        c = self.encode_output(x_input)
        # print "p.data.shape:", p.data.shape
        # print "c.data.shape:", c.data.shape
#        print c.data.shape		#(3,50)
#        print "functions.swapaxes(c ,1, 1):", functions.swapaxes(c ,1, 1).data.shape
        o = functions.matmul(functions.swapaxes(c ,1, 0), p)	 #転置して、内積とる		(2, 50, 1)
        o = functions.swapaxes(o ,1, 0) # (2, 50)	
#        print "u.data.shape:", u.data.shape
#        print "o.data.shape:", o.data.shape
#        print "u.data:", u.data
#        print "o.data:", o.data
#        print "(u+o).data.shape:", (u+o).data.shape
        predict = self.W(u + o)
        loss = functions.softmax_cross_entropy(predict, answer)
        return loss
Exemple #2
0
    def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        """
        batch_size = hid.data.shape[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
            #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
            #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
            #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean', v_mean.data.shape)
            #print('v_mean sum', F.sum(v_mean).data)
            #print('hid', hid.data.shape)

        else:
            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean
Exemple #3
0
 def __call__(self, src, is_train=False, xp=np):
     # Some namings
     B  = len(src)      # Batch Size
     N  = len(src[0])   # length of source
     H  = self.H
     src_col = lambda x: Variable(self.xp.array([src[i][x] for i in range(B)], dtype=np.int32))
     embed   = lambda e, x: e(self.IE(x), is_train=is_train)
     bi_rnn  = lambda x, y: self.AE(F.concat((x[0], y[1]), axis=1))
     concat_source = lambda S, s: s if S is None else F.concat((S, s), axis=2)
     # State Reset
     self.EF.reset_state()
     self.EB.reset_state()
    
     # Forward + backward encoding
     s = []
     for j in range(N):
         s.append((
             embed(self.EF, src_col(j)),
             embed(self.EB, src_col(-j-1))
         ))
     
     # Joining the encoding data together
     S = None
     for j in range(N):
         s_j = bi_rnn(s[j], s[-j-1])
         S = concat_source(S, F.reshape(s_j, (B, H, 1)))
     S = F.swapaxes(S, 1, 2)
     return S, s_j
Exemple #4
0
 def check_forward(self, x_data):
     axis1, axis2 = self.axis1, self.axis2
     x = chainer.Variable(x_data)
     y = functions.swapaxes(x, axis1, axis2)
     self.assertEqual(y.data.dtype, self.dtype)
     self.assertTrue((self.x.swapaxes(axis1, axis2) ==
                      cuda.to_cpu(y.data)).all())
Exemple #5
0
    def check_backward(self, x_data, y_grad):
        x = chainer.Variable(x_data)
        y = functions.swapaxes(x, self.axis1, self.axis2)
        y.grad = y_grad
        y.backward()

        func = y.creator
        f = lambda: func.forward((x.data.copy(),))

        gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,), eps=1e-5)
        gradient_check.assert_allclose(gx, x.grad, rtol=1e-5)
Exemple #6
0
    def __call__(self, xs, ilens):
        '''VGG2L forward

        :param xs:
        :param ilens:
        :return:
        '''
        logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens))

        # x: utt x frame x dim
        xs = F.pad_sequence(xs)

        # x: utt x 1 (input channel num) x frame x dim
        xs = F.swapaxes(
            F.reshape(xs, (xs.shape[0], xs.shape[1], self.in_channel,
                           xs.shape[2] // self.in_channel)), 1, 2)

        xs = F.relu(self.conv1_1(xs))
        xs = F.relu(self.conv1_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        xs = F.relu(self.conv2_1(xs))
        xs = F.relu(self.conv2_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        # change ilens accordingly
        ilens = self.xp.array(self.xp.ceil(
            self.xp.array(ilens, dtype=np.float32) / 2),
                              dtype=np.int32)
        ilens = self.xp.array(self.xp.ceil(
            self.xp.array(ilens, dtype=np.float32) / 2),
                              dtype=np.int32)

        # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim)
        xs = F.swapaxes(xs, 1, 2)
        xs = F.reshape(xs,
                       (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3]))
        xs = [xs[i, :ilens[i], :] for i in range(len(ilens))]

        return xs, ilens
Exemple #7
0
 def query(self, u):
     xp = cuda.get_array_module(u)
     size = self.m.shape[1]
     inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, self.m.shape)
     tc = F.broadcast_to(tc, self.c.shape)
     p = F.softmax(F.batch_matmul(self.m + tm, u))
     o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p)
     o = F.squeeze(o, -1)
     u = o + u
     return u
Exemple #8
0
 def forward_cnn(self, h):
     # Check and prepare for 2d convolutions
     h = F.expand_dims(h, 2)
     h = F.swapaxes(h, 1, 2)
     # Apply each CNN layer
     for i, cnn_layer in enumerate(self.cnns):
         # cnn pass
         h = self[cnn_layer](h)
         # Apply batch normalization
         if self.cnn_bn:
             bn_lname = '{0:s}_bn'.format(cnn_layer)
             h = self[bn_lname](h)
         # Apply non-linearity
         h = F.relu(h)
     """
     Prepare return
     batch size * num time frames after pooling * cnn out dim
     """
     h = F.swapaxes(h, 1, 2)
     h = F.reshape(h, h.shape[:2] + tuple([-1]))
     h = F.rollaxis(h, 1)
     return h
Exemple #9
0
 def maxpooling(self, xs, neighbor):
     sources = defaultdict(list)
     for ee in neighbor:
         for i in neighbor[ee]:
             sources[i].append(xs[ee])
     # sources:键值为实体编号i,值为与实体有关系的所有实体经转换函数后的embedding
     # 将sources根据实体集的编号排序
     # 最后得到的len(result)=len(entities)
     result = []
     for i, xxs in sorted(sources.items(), key=lambda x: x[0]):
         if len(xxs) == 1:
             x = xxs[0]
             x = self.forwardAA(x, xxs)  # attention
             result.append(x)
         else:
             x = F.concat(xxs, axis=0)  # -> (b,d)
             x = F.swapaxes(x, 0, 1)  # -> (d,b)
             x = F.maxout(x, len(xxs))  # -> (d,1)
             x = F.swapaxes(x, 0, 1)  # -> (1,d)
             x = self.forwardAA(x, xxs)  # attention
             result.append(x)
     return result
Exemple #10
0
 def __call__(self, x, pid):
     x = self.bn(x)
     x = F.swapaxes(x, axis1=1, axis2=3)
     y = F.expand_dims(F.expand_dims(pid, axis=-1), axis=-1)
     y = F.tile(y, reps=(1, 1, self.audio_window_size, 1))
     x = F.concat((x, y), axis=1)
     x = self.branch(x)
     x = F.reshape(x, shape=(x.shape[0], -1))
     x = F.concat((x, pid), axis=1)
     x = self.fc1(x)
     x = F.tanh(x)
     x = self.fc2(x)
     return x
Exemple #11
0
 def predict(self, combined_x):
     """Forward pass for combined input."""
     # combined_x (..., W, H, E+T)
     in_x = F.reshape(combined_x,
                      (-1, ) + combined_x.shape[-3:])  # (N, W, H, E+T)
     in_x = F.swapaxes(in_x, -1, -3)  # (N, E+T, H, W)
     out = F.relu(self.conv1(in_x))  # (N, E, H, W)
     out = F.relu(self.conv2(out))  # (N, E, W', H')
     out = F.max_pooling_2d(out, tuple(GRID))  # (N, E, W', H')
     out = self.fc1(out)  # (N, V)
     out = F.squeeze(out) @ self.embed.W.T  # (N, V)
     out = F.reshape(out, combined_x.shape[:-3] + (VOCAB, ))  # (..., V)
     return out
Exemple #12
0
	def __call__(self, X, split_into_variables=True, add_noise_to_input=True):
		xp = self.xp
		batchsize = X.shape[0]
		seq_length = X.shape[1]
		enmbedding = self.embed(X)

		# insert noise at <BLANK> (optional)
		if add_noise_to_input:
			noise = xp.random.normal(0, 1, enmbedding.shape)
			mask = X == BLANK
			mask = xp.broadcast_to(xp.expand_dims(mask, 2), noise.shape)
			enmbedding += noise * mask

		enmbedding = F.swapaxes(enmbedding, 1, 2)
		in_data = []
		if self.ndim_embedding == self.ndim_h:
			in_data.append(enmbedding)

		out_data = self._forward_layer(0, enmbedding)
		in_data.append(out_data)
		for layer_index in xrange(1, self.num_layers):
			out_data = self._forward_layer(layer_index, sum(in_data) if self.densely_connected else in_data[-1])	# dense conv
			in_data.append(out_data)

		out_data = sum(in_data) if self.densely_connected else out_data	# dense conv

		if self.dropout:
			out_data = F.dropout(out_data, ratio=self.dropout)

		out_data = self.dense(out_data)

		if split_into_variables:
			out_data = F.swapaxes(out_data, 1, 2)
			out_data = F.reshape(out_data, (batchsize, -1))
			out_data = F.split_axis(out_data, seq_length, axis=1)
		else:
			out_data = F.swapaxes(out_data, 1, 2)

		return out_data
    def attention(self, hs_padded, ht_padded):
        ht_padded_W = self.W(F.concat(ht_padded, axis=0)).reshape(
            ht_padded.shape)  # bt * maxlen_t * demb
        hs_swap = F.swapaxes(hs_padded, 1, 2)  # bt *  demb * maxlen_s
        attn_matix = F.matmul(ht_padded_W,
                              hs_swap)  # bt *  maxlen_t * maxlen_s
        attn_matix_sm = F.softmax(attn_matix,
                                  axis=2)  # bt *  maxlen_t * maxlen_s
        context_vector = F.matmul(
            attn_matix_sm, hs_padded
        )  # (bt *  maxlen_t * maxlen_s) * (#bt * maxlen_s * demb) = bt * maxlen_t * demb

        return context_vector, attn_matix_sm
Exemple #14
0
 def seq_encode(self,xs):
     embed_xs = self.embed(xs)
     embed_xs.unchain_backward()
     batchsize, seq_length, dim = embed_xs.shape
     sum_embed_xs = F.sum(embed_xs,axis=1)
     embed_xs_reshape = F.reshape(embed_xs, (batchsize, 1, seq_length, dim))
     # embed_avg = F.average_pooling_2d(embed_xs_reshape, ksize=(embed_xs.shape[2], 1))
     # 1. wide_convolution
     # 著者はnarrow?
     xs_conv1 = F.tanh(self.conv1(embed_xs_reshape))
     # xs_conv1_swap = F.reshape(F.swapaxes(xs_conv1, 1, 3),(batchsize, seq_length+3, 50))
     xs_conv1_swap = F.swapaxes(xs_conv1, 1, 3) # (batchsize, 50, seqlen, 1) --> (batchsize, 1, seqlen, 50)
     return sum_embed_xs, xs_conv1, xs_conv1_swap
Exemple #15
0
 def __call__(self, xs):
     """
     Forward pass of a sentence.
     :param xs: a batch of sentences
     :return h: final hidden states
     """
     xs = self.embed(xs)
     xs = F.swapaxes(xs, 0, 1) # time, batch, embed
     self.rnn.reset_state()
     for x in xs:
         h = self.rnn(x)
     h = F.tanh(self.linear(h))
     return h
Exemple #16
0
 def query(self, u):
     xp = cuda.get_array_module(u)
     size = self.m.shape[1]
     inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, self.m.shape)
     tc = F.broadcast_to(tc, self.c.shape)
     p = F.softmax(F.batch_matmul(self.m + tm, u))
     o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p)
     o = F.squeeze(o, -1)
     u = o + u
     return u
Exemple #17
0
    def forward_rnn_encode_proj(self, X):
        # Reset rnn state
        self.reset_rnn_state()
        # Get input shape
        in_size, batch_size, in_dim = X.shape
        enc_states = X
        for currL in range(len(self.rnn_enc)):
            for i in range(in_size):
                temp_f = F.expand_dims(
                    F.dropout(self[self.rnn_enc[currL]](enc_states[i]),
                              ratio=self.cfg["dropout"]["rnn"]), 0)
                # if bi-directional
                if self.bi_rnn:
                    temp_r = F.expand_dims(
                        F.dropout(self[self.rnn_rev_enc[currL]](
                            enc_states[-1]),
                                  ratio=self.cfg["dropout"]["rnn"]), 0)

                if i > 0:
                    h_fwd = F.concat((h_fwd, temp_f), axis=0)
                    if self.bi_rnn:
                        h_rev = F.concat((h_rev, temp_r), axis=0)
                else:
                    h_fwd = temp_f
                    if self.bi_rnn:
                        h_rev = temp_r
            # end current rnn layer
            if self.bi_rnn:
                h_rev = F.flipud(h_rev)
                rnn_states = F.concat((h_fwd, h_rev), axis=2)
            else:
                rnn_states = h_fwd
            """
            Apply linear projection
            """
            # print(f"Applying rnn {currL}")
            if currL < (len(self.rnn_enc) - 1):
                # print(f"Applying linear linear_proj {currL}")
                for i in range(0, in_size):
                    currH = F.relu(self[f"enc_proj{currL}_bn"](
                        self[f"enc_proj{currL}"](rnn_states[i])))
                    if i > 0:
                        enc_states = F.concat(
                            (enc_states, F.expand_dims(currH, 0)), axis=0)
                    else:
                        enc_states = F.expand_dims(currH, 0)
                # end for all hidden states
        # end all layers

        # Make the batch size as the first dimension
        self.enc_states = F.swapaxes(enc_states, 0, 1)
Exemple #18
0
    def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
        pad = self._kernel_size - 1
        WX = self.W(X)[:, :, -pad - 1, None]
        Vh = self.V(ht_enc)

        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if self.contexts is None:
                ct = (1 - f) * z
                self.contexts = [ct]
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t - T]
            bias = 0 if skip_mask is None else softmax_bias[
                ..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[...,
                                                         None]  # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if self.H is None:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat(
                    (self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
Exemple #19
0
    def __call__(self, h1s, h2s):
        # 散らかってるが,とりあえずコレで
        seq_len, _ = h1s[0].shape
        h2s_len = [x.shape[0] for x in h2s]

        h1s_stack = F.stack(h1s, axis=0)
        h2s_stack = F.pad_sequence(h2s, padding=-1)

        h2s_mask = self.xp.swapaxes((h2s_stack.data != -1)[:, :, :seq_len], 1,
                                    2)
        minfs = self.xp.full(h2s_mask.shape, -np.inf, dtype=np.float32)
        raw_attn_mat = F.batch_matmul(h1s_stack, F.swapaxes(h2s_stack, 1, 2))
        masked_attn_mat = F.where(h2s_mask, raw_attn_mat, minfs)

        # h1s 方向に重み付き和を計算
        # ここを正規化してもいいはず
        h1s_attn = F.batch_matmul(F.softmax(masked_attn_mat, axis=2),
                                  h2s_stack)
        m1 = calc_vector_interactions(h1s_stack, h1s_attn)
        if self.drop_local_inference:
            m1 = F.dropout(m1, 0.5)
        m1s = F.separate(m1, axis=0)

        # h2s 方向に重み付き和を計算
        # ここを正規化してもいいはず
        h2s_attn_mat = F.softmax(masked_attn_mat, axis=1)
        # こっちの方向だと,softmax計算時にnanが生まれるので,それを0埋め
        # 0埋めしないとnanと実数との積が発生し,全体の計算が死んでしまう
        masked_h2s_attn_mat = F.where(h2s_mask, h2s_attn_mat,
                                      self.xp.zeros(h2s_mask.shape, dtype='f'))
        h2s_attn = F.swapaxes(
            F.batch_matmul(F.swapaxes(h1s_stack, 1, 2), masked_h2s_attn_mat),
            1, 2)
        m2 = calc_vector_interactions(h2s_stack, h2s_attn)
        if self.drop_local_inference:
            m2 = F.dropout(m2, 0.5)
        m2s = [h[:l, :] for h, l in zip(F.separate(m2, axis=0), h2s_len)]
        return m1s, m2s
Exemple #20
0
    def __call__(self, x):
        """ call
        Args:
            x: [batch, n_global_capsule, caps_dim, n_local_grid, n_local_grid]
                ex) [?, 32, 8, 6, 6]
                    -> [?, 32, 6, 6, 8]
                    -> [?, 10, 1152, 8, 1]
        """
        # calculating x_hat
        x = F.swapaxes(x, self.caps_dim, -1)
        x = F.reshape(x, (-1, self.in_caps, self.in_dims))
        x = F.expand_dims(x, -1)
        x = F.expand_dims(x, 1)
        x = F.tile(x, (1, self.out_caps, 1, 1, 1))
        Ws = F.tile(self.W, (x.shape[0], 1, 1, 1, 1))
        x_hats = F.matmul(Ws, x)

        # dynamic routing
        x_hats = F.swapaxes(x_hats, 2, 3)
        x_hats = F.reshape(x_hats, x_hats.shape[:-1])
        v_j = routing(x_hats, self.n_iters)

        return v_j
Exemple #21
0
 def query(self, u):
     m = self.m
     c = self.c
     batch, size = m.data.shape[:2]
     inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1])
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, (batch,) + tm.data.shape)
     tc = F.broadcast_to(tc, (batch,) + tc.data.shape)
     p = F.softmax(F.batch_matmul(m + tm, u))
     o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p)
     o = F.reshape(o, (batch, m.data.shape[2]))
     u = o + u
     return u
Exemple #22
0
 def query(self, u):
     m = self.m
     c = self.c
     batch, size = m.data.shape[:2]
     inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1])
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, (batch, ) + tm.data.shape)
     tc = F.broadcast_to(tc, (batch, ) + tc.data.shape)
     p = F.softmax(F.batch_matmul(m + tm, u))
     o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p)
     o = F.reshape(o, (batch, m.data.shape[2]))
     u = o + u
     return u
 def __call__(self, x):
     z = F.relu(self[1](self[0](x)))
     z = F.dropout(
         F.max_pooling_2d(z, ksize=(1, 5), stride=(1, 5), pad=(0, 0)), .1)
     z = F.relu(self[3](self[2](z)))
     z = F.dropout(
         F.max_pooling_2d(z, ksize=(1, 5), stride=(1, 5), pad=(0, 0)), .1)
     z = F.relu(self[5](self[4](z)))
     z = F.dropout(
         F.max_pooling_2d(z, ksize=(1, 7), stride=(1, 7), pad=(0, 0)), .1)
     z = self[6](z)
     z = F.squeeze(z)
     z = F.swapaxes(z, 1, 2)
     return z
Exemple #24
0
    def decode_one_step(self, X, encoder_last_hidden_states, test=False):
        assert len(encoder_last_hidden_states) == self.num_layers
        batchsize = X.shape[0]
        seq_length = X.shape[1]

        xt = X[:, -1, None]
        enmbedding = self.decoder_embed(xt)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_decoder_layer_one_step(
            0, enmbedding, encoder_last_hidden_states[0], test=test)
        in_data = [out_data]

        for layer_index in xrange(1, self.num_layers):
            out_data = self._forward_decoder_layer_one_step(
                layer_index,
                sum(in_data) if self.densely_connected else in_data[-1],
                encoder_last_hidden_states[layer_index],
                test=test)
            in_data.append(out_data)

        out_data = sum(
            in_data) if self.densely_connected else out_data  # dense conv
        out_data = out_data[:, :, -1, None]

        if self.dropout:
            out_data = F.dropout(out_data,
                                 ratio=self.dropout_ratio,
                                 train=not test)

        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.ndim_h))
        Y = self.dense(out_data)

        if test:
            Y.unchain_backward()

        return Y
Exemple #25
0
    def decode_one_step(self, X, encoder_last_hidden_states):
        assert len(encoder_last_hidden_states) == self.num_layers
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        ksize = self.decoder_kernel_size

        if seq_length < ksize:
            self.reset_state()
            return self.decode(X, encoder_last_hidden_states, return_last=True)

        xt = X[:, -ksize:]
        enmbedding = self.decoder_embed(xt)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_decoder_layer_one_step(
            0, enmbedding, encoder_last_hidden_states[0])
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_decoder_layer_one_step(
                layer_index,
                F.concat(in_data) if self.densely_connected else in_data[-1],
                encoder_last_hidden_states[layer_index])
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else in_data[
            -1]  # dense conv
        out_data = out_data[:, :, -1, None]

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        out_data = self.fc(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2),
                             (-1, self.vocab_size_dec))

        return out_data
Exemple #26
0
 def query(self, u):
     xp = cuda.get_array_module(u.data)
     m = self.m
     c = self.c
     batch, size = m.data.shape[:2]
     inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, (batch,) + tm.data.shape)
     tc = F.broadcast_to(tc, (batch,) + tc.data.shape)
     p = F.softmax(F.batch_matmul(m + tm, u))
     o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p)
     o = o[:, :, 0]
     u = o + u
     return u
Exemple #27
0
    def __call__(self, x):
        batch = x.shape[0]
        batch_seq_len = batch * self.seq_len

        x = F.reshape(x,
                      shape=(batch_seq_len, 1, self.audio_window_size,
                             self.audio_features))
        x = F.swapaxes(x, axis1=1, axis2=3)
        x = self.conv_branch(x)
        x = F.reshape(x, shape=(batch_seq_len, 1, -1))
        x = self.fc_branch(x)
        x = F.reshape(x, shape=(batch, self.seq_len, -1))
        x = F.swapaxes(x, axis1=1, axis2=2)

        y = x[:, :, (self.seq_len // 2)]

        w = self.att_conv_branch(x)
        w = F.reshape(w, shape=(batch, self.seq_len))
        w = self.att_fc(w)
        w = F.expand_dims(w, axis=-1)
        x = F.batch_matmul(x, w)
        x = F.squeeze(x, axis=-1)

        return x, y
 def __call__(self, x, dur=1):
     x = F.pad(x, [(0, 0), (0, 0), (125 * dur, 125 * dur), (0, 0)],
               'constant')
     z = F.relu(self[1](self[0](x)))
     z = F.dropout(
         F.max_pooling_2d(z, ksize=(15, 1), stride=(15, 1), pad=(0, 0)), .1)
     z = F.relu(self[3](self[2](z)))
     z = F.dropout(
         F.max_pooling_2d(z, ksize=(11, 1), stride=(11, 1), pad=(0, 0)), .1)
     z = F.relu(self[5](self[4](z)))
     z = F.relu(self[7](self[6](z)))
     z = self[8](z)
     z = F.squeeze(z)
     z = F.swapaxes(z, 1, 2)
     return z
Exemple #29
0
    def __call__(self, x):
        h = self.conv1(x, self.train)
        h = self.conv2(h, self.train)
        h = F.max_pooling_2d(h, (1, 160))
        h = F.swapaxes(h, 1, 2)

        h = self.conv3(h, self.train)
        h = F.max_pooling_2d(h, 3)
        h = self.conv4(h, self.train)
        h = F.max_pooling_2d(h, (1, 3))

        h = F.dropout(F.relu(self.fc5(h)), train=self.train)
        h = F.dropout(F.relu(self.fc6(h)), train=self.train)

        return self.fc7(h)
Exemple #30
0
def benchmark_cuda_ctc(batchsize,
                       label_length,
                       seq_length,
                       vocab_size,
                       repeat=50):
    label_unigram = xp.random.randint(1,
                                      vocab_size,
                                      size=(batchsize,
                                            label_length)).astype(xp.int32)
    length_unigram = xp.full((batchsize, ), label_length, dtype=np.int32)
    blank_symbol = 0

    x = xp.random.normal(0, 1,
                         size=batchsize * vocab_size * seq_length).reshape(
                             (batchsize, vocab_size,
                              seq_length)).astype(xp.float32)

    in_data = Variable(x)
    in_data = F.swapaxes(in_data, 1, 2)
    in_data = F.reshape(in_data, (batchsize, -1))
    in_data = F.split_axis(in_data, seq_length, axis=1)

    x_length = Variable(xp.full((batchsize, ), seq_length, dtype=np.int32))

    start_time = time.time()
    for i in range(repeat):
        loss_ctc = cuda_ctc.connectionist_temporal_classification(
            in_data,
            label_unigram,
            blank_symbol,
            x_length,
            Variable(length_unigram),
            reduce="mean")
    forward_time_mean = (time.time() - start_time) / repeat

    start_time = time.time()
    for i in range(repeat):
        loss_ctc = cuda_ctc.connectionist_temporal_classification(
            in_data,
            label_unigram,
            blank_symbol,
            x_length,
            Variable(length_unigram),
            reduce="mean")
        loss_ctc.backward()
    backward_time_mean = (time.time() - start_time) / repeat

    return forward_time_mean, backward_time_mean
Exemple #31
0
    def wsd_with_tc(self, sent, trf_encoded_matrix, labels):

        ### WSD ###

        if self.model_type == "TRF-Multi" or self.model_type == "TRF-Delay-Multi":
            y_wsd = self.wsd_only(trf_encoded_matrix, labels)
        elif self.model_type == "TRF-Sequential":
            y_wsd, task_type = self.wsd_model(sent, None, None,
                                              True)  ## 読み込みsequential

        y_wsd_soft = F.softmax(y_wsd)  ## 予測結果にSoftmaxをかける
        argmax_wsd = F.argmax(y_wsd_soft, axis=1)  ## 最大のインデクス値を取ってくる
        cond = chainer.Variable(
            self.xp.array([
                True if i != "<PAD>" else False for i in list(chain(*labels))
            ]))  ## 語義のラベルがついていない単語は無視するための条件
        pad_array = chainer.Variable(
            -1 * self.xp.ones(argmax_wsd.shape, dtype=argmax_wsd.dtype))
        pad_array_argmax_wsd = F.where(cond, argmax_wsd, pad_array)

        sense_label_embed = F.embed_id(x=pad_array_argmax_wsd,
                                       W=self.xp.array(
                                           self.lookup_table_sense_fixed),
                                       ignore_label=-1)  ## 固定.

        sense_label_embed = sense_label_embed.reshape(
            trf_encoded_matrix.shape[0], trf_encoded_matrix.shape[-1], -1)
        origin_shape = sense_label_embed.shape
        sense_label_embed = F.moveaxis(sense_label_embed, 1, 2)

        ## 置き換え ##
        cond_reshape = cond.reshape(cond.shape[0], -1)
        cond_reshape = F.broadcast_to(
            cond_reshape, (cond_reshape.shape[0], trf_encoded_matrix.shape[1]))
        cond_reshape = cond_reshape.reshape(origin_shape)
        cond_reshape = F.swapaxes(cond_reshape, 1, 2)
        replaced_trf_matrix = F.where(cond_reshape, sense_label_embed,
                                      trf_encoded_matrix)

        ### WSDの予測をTCに組み入れる ###
        tc = replaced_trf_matrix  ## 置換後の文書行列

        ### TC ###
        tc_features = F.sum(tc, axis=2)  ## TC特徴
        y_tc = self.fc2(tc_features)  ### TCの予測結果

        return (y_tc, y_wsd) if (self.model_type == "TRF-Multi") or (
            self.model_type == "TRF-Delay-Multi") else y_tc
Exemple #32
0
    def eval(self, **dataset):
        """Calculate loss function from given datasets and model.

        Args:
            **dataset (~numpy.ndarray):
                Datasets passed as kwargs. Name of each key is in the
                format 'inputs/N' or 'labels/N'. 'N' is the order of
                the dataset.

        Returns:
            ~chainer.Variable:
            A scalar value calculated with loss function.
        """
        inputs = [dataset[f'inputs/{i}'] for i
                  in range(self.order['descriptor'] + 1)]
        labels = [dataset[f'labels/{i}'] for i
                  in range(self.order['property'] + 1)]
        predictions = self._model.predict(inputs, self.order['descriptor'])

        loss0 = F.mean_squared_error(predictions[0], labels[0])
        loss1 = F.mean_squared_error(predictions[1], labels[1])
        loss_sum1 = F.mean(predictions[1])
        transverse = F.swapaxes(predictions[2], 2, 3)
        loss_rot = F.mean(F.square((predictions[2] - transverse)
                                   / (predictions[2] + transverse)))
        total_loss = ((1.0 - self._mixing_beta) * loss0
                      + self._mixing_beta * loss1
                      + self._summation * loss_sum1
                      + self._rotation * loss_rot)

        RMSE0 = F.sqrt(loss0)
        RMSE1 = F.sqrt(loss1)
        AbsMean1 = F.absolute(loss_sum1)
        RMS_rot = F.sqrt(loss_rot)
        total = ((1.0 - self._mixing_beta) * RMSE0
                 + self._mixing_beta * RMSE1
                 + self._summation * AbsMean1
                 + self._rotation * RMS_rot)

        observation = {
            self._observation_keys[0]: RMSE0,
            self._observation_keys[1]: RMSE1,
            self._observation_keys[2]: AbsMean1,
            self._observation_keys[3]: RMS_rot,
            self._observation_keys[4]: total,
            }
        chainer.report(observation, observer=self._model)
        return total_loss
Exemple #33
0
    def inverse(self, y):
        scale_sqr = self.scale * self.scale
        batch, y_channels, y_height, y_width = y.shape
        assert (y_channels % scale_sqr == 0)
        x_channels = y_channels // scale_sqr
        x_height = y_height * self.scale
        x_width = y_width * self.scale

        x = F.transpose(y, axes=(0, 2, 3, 1))
        x = x.reshape(batch, y_height, y_width, scale_sqr, x_channels)
        d3_split_seq = F.split_axis(x, indices_or_sections=(x.shape[3] // self.scale), axis=3)
        d3_split_seq = [t.reshape(batch, y_height, x_width, x_channels) for t in d3_split_seq]
        x = F.stack(d3_split_seq, axis=0)
        x = F.transpose(F.swapaxes(x, axis1=0, axis2=1), axes=(0, 2, 1, 3, 4)).reshape(
            batch, x_height, x_width, x_channels)
        x = F.transpose(x, axes=(0, 3, 1, 2))
        return x
Exemple #34
0
    def forward(self, xs, ilens):
        """Subsample x.

        :param chainer.Variable x: input tensor
        :return: subsampled x and mask

        """
        xs = self.xp.array(xs[:, None])
        xs = F.relu(self.conv1(xs))
        xs = F.relu(self.conv2(xs))
        batch, _, length, _ = xs.shape
        xs = self.out(F.swapaxes(xs, 1, 2).reshape(batch * length, -1))
        xs = self.pe(xs.reshape(batch, length, -1))
        # change ilens accordingly
        ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int)
        ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int)
        return xs, ilens
Exemple #35
0
    def __call__(self, inputs):
        pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(
            inputs)
        batch_size, past_len, _ = pos_x.shape

        h = self.pos_encoder(pos_x)
        h = self.inter(h)
        h = self.pos_decoder(h)
        pred_y = self.last(h)
        pred_y = F.swapaxes(pred_y, 1, 2)
        pred_y = pred_y[:, :pos_y.shape[1], :]
        loss = F.mean_squared_error(pred_y, pos_y)

        pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1),
                                         pred_y.shape)
        pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean
        return loss, pred_y, None
Exemple #36
0
    def reconstruct(self, v):
        """

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        """
        batch_size = v.data.shape[0]
        xp = cuda.get_array_module(v.data)
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
        else:
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v
Exemple #37
0
    def reconstruct(self, v):
        """

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        """
        batch_size = v.data.shape[0]
        xp = cuda.get_array_module(v.data)
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
        else:
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v
Exemple #38
0
def c():
    label_unigram = np.asarray([
        [1, 2, 4, 3, 5],
        [2, 4, 3, 0, 0],
    ],
                               dtype=np.int32)
    label_bigram = np.asarray([
        [-1, 6, -1, 7, 8],
        [-1, 6, 9, 0, 0],
    ],
                              dtype=np.int32)
    blank_symbol = 0
    path = gram_ctc._label_to_path(label_unigram, label_bigram, blank_symbol,
                                   np)

    length_unigram = np.asarray([5, 3])
    length_bigram = length_unigram - 1
    path_length = length_unigram * 2 + 1 + length_bigram
    print("path_length", path_length)

    vocab_size = 10
    seq_length = 5
    batchsize = 2
    xs = np.random.normal(0, 1,
                          size=batchsize * vocab_size * seq_length).reshape(
                              (batchsize, vocab_size, 1,
                               seq_length)).astype(np.float32)
    xs = Variable(xs)
    xs = functions.swapaxes(xs, 1, 3)
    xs = functions.reshape(xs, (batchsize, -1))
    xs = functions.split_axis(xs, seq_length, axis=1)
    xs = [x.data for x in xs]

    x_length = np.asarray([seq_length, seq_length // 2], dtype=np.int32)
    yseq_shape = (len(xs), ) + xs[0].shape
    print(yseq_shape)
    yseq = gram_ctc._softmax(np.vstack(xs).reshape(yseq_shape), np)
    print(yseq)

    zero_padding = -100
    log_yseq = gram_ctc._log_matrix(yseq, np, zero_padding)
    prob_trans = gram_ctc._compute_transition_probability(
        log_yseq, x_length, label_unigram, length_unigram, label_bigram,
        length_bigram, path, path_length, np, zero_padding)
Exemple #39
0
	def __call__(self, x):
		return functions.swapaxes(x, self.axis1, self.axis2)
Exemple #40
0
 def check_backward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.swapaxes(x, self.axis1, self.axis2)
     y.grad = y.data
     y.backward()
     gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
Exemple #41
0
 def f(x):
     return functions.swapaxes(x, self.axis1, self.axis2)
Exemple #42
0
 def f(x):
     y = functions.swapaxes(x, self.axis1, self.axis2)
     return y * y
Exemple #43
0
 def forward(self, inputs, devices):
     x, = inputs
     y = functions.swapaxes(x, self.axis1, self.axis2)
     return y,