Beispiel #1
0
    def check_backward(self, x_data, W_data, b_data, y_grad):
        x = chainer.Variable(x_data)
        W = chainer.Variable(W_data)
        if b_data is None:
            y = functions.maxout(x, W)
        else:
            b = chainer.Variable(b_data)
            y = functions.maxout(x, W, b)

        y.grad = y_grad
        y.backward()
        func = y.creator

        if b_data is None:
            f = lambda: func.forward((x.data, W.data))
            gx, gW = gradient_check.numerical_grad(f, (x.data, W.data),
                                                   (y.grad, ),
                                                   eps=1e-2)
        else:
            f = lambda: func.forward((x.data, W.data, b.data))
            gx, gW, gb = gradient_check.numerical_grad(
                f, (x.data, W.data, b.data), (y.grad, ), eps=1e-2)

        gradient_check.assert_allclose(gx, x.grad, atol=1e-2)
        gradient_check.assert_allclose(gW, W.grad, atol=1e-2)
        if b_data is not None:
            gradient_check.assert_allclose(gb, b.grad, atol=1e-2)
Beispiel #2
0
 def check_forward(self, x_data, W_data, b_data, y_expect):
     x = chainer.Variable(x_data)
     W = chainer.Variable(W_data)
     if b_data is None:
         y = functions.maxout(x, W)
     else:
         b = chainer.Variable(b_data)
         y = functions.maxout(x, W, b)
     gradient_check.assert_allclose(y_expect, y.data)
Beispiel #3
0
 def check_forward(self, x_data, W_data, b_data, y_expect):
     x = chainer.Variable(x_data)
     W = chainer.Variable(W_data)
     if b_data is None:
         y = functions.maxout(x, W)
     else:
         b = chainer.Variable(b_data)
         y = functions.maxout(x, W, b)
     gradient_check.assert_allclose(y_expect, y.data)
    def __call__(self, s, q, s_mask, q_mask):
        """
        s_bar, _, _ = self.pred_bilstm(None, None, s)

        s_bar_new = F.concat(s_bar, axis=1)

        q_bar, _, _ = self.pred_bilstm(None, None, q)

        q_bar_new = F.concat(q_bar, axis=1)
        """

        _, _, s_bar = self.pred_bilstm(None, None, s)  # get list of [seq, dim]
        s_bar_new = F.stack(s_bar, axis=0)  # turn list to 3d tensor

        _, _, q_bar = self.pred_bilstm(None, None, q)  # get list of [seq, dim]
        q_bar_new = F.stack(q_bar, axis=0)  # turn list to 3d tensor
        # mean-max pooling

        s_sum = F.sum(s_mask, axis=-1)
        q_sum = F.sum(q_mask, axis=-1)

        s_batch, s_seq = s_mask.shape
        s_mask_broad = F.broadcast_to(F.reshape(s_mask, (s_batch, s_seq, 1)),
                                      (s_batch, s_seq, s_bar_new.shape[-1]))
        s_broad = s_bar_new * s_mask_broad
        """
        s_infinit_matrix = self.xp.ones((s_batch, s_seq, s_bar_new.shape[-1]), dtype=self.xp.float32) * -1 * self.xp.inf
        s_cond = s_mask_broad.data.astype(self.xp.bool)

        s_broad_max = F.where(s_cond, s_bar_new, s_infinit_matrix)
        """
        s_mean = F.average(s_broad, axis=1)  # [batch_size, dim]
        s_max = F.maxout(
            F.reshape(
                s_bar_new,
                (s_bar_new.shape[0], s_bar_new.shape[1] * s_bar_new.shape[2])),
            s_bar_new.shape[-1])  # [batch_size, dim]

        q_batch, q_seq = q_mask.shape
        q_broad = q_bar_new * F.broadcast_to(
            F.reshape(q_mask, (q_batch, q_seq, 1)),
            (q_batch, q_seq, q_bar_new.shape[-1]))
        q_mean = F.average(q_broad, axis=1)  # [batch_size, dim]
        q_max = F.maxout(
            F.reshape(
                q_bar_new,
                (q_bar_new.shape[0], q_bar_new.shape[1] * q_bar_new.shape[2])),
            q_bar_new.shape[-1])  # [batch_size, dim]

        summarized_vector = F.concat([s_mean, s_max, q_mean, q_max], axis=1)

        s_linear_output = self.gelu(self.L(summarized_vector))

        y = F.softmax(s_linear_output)

        return y
Beispiel #5
0
    def check_backward(self, x_data, W_data, b_data, y_grad):
        x = chainer.Variable(x_data)
        W = chainer.Variable(W_data)
        if b_data is None:
            y = functions.maxout(x, W)
        else:
            b = chainer.Variable(b_data)
            y = functions.maxout(x, W, b)

        y.grad = y_grad
        y.backward()
        func = y.creator

        if b_data is None:
            f = lambda: func.forward((x.data, W.data))
            gx, gW = gradient_check.numerical_grad(f, (x.data, W.data), (y.grad,), eps=1e-2)
        else:
            f = lambda: func.forward((x.data, W.data, b.data))
            gx, gW, gb = gradient_check.numerical_grad(f, (x.data, W.data, b.data), (y.grad,), eps=1e-2)

        gradient_check.assert_allclose(gx, x.grad, atol=1e-2)
        gradient_check.assert_allclose(gW, W.grad, atol=1e-2)
        if b_data is not None:
            gradient_check.assert_allclose(gb, b.grad, atol=1e-2)
Beispiel #6
0
	def maxpooling(self,xs,neighbor):
		sources = defaultdict(list)
		for ee in neighbor:
			for i in neighbor[ee]:
				sources[i].append(xs[ee])
		result = []
		for i,xxs in sorted(sources.items(),key=lambda x:x[0]):
			if len(xxs)==1: result.append(xxs[0])
			else:
				x = F.concat(xxs,axis=0)					# -> (b,d)
				x = F.swapaxes(x,0,1)						# -> (d,b)
				x = F.maxout(x,len(xxs))					# -> (d,1)
				x = F.swapaxes(x,0,1)						# -> (1,d)
				result.append(x)
		return result
Beispiel #7
0
    def __call__(self, y, cv, c, h):
        """ @param:
                y: y_{i-1}, last generated word
                cv: context vector c_{i}
                c: LSTM memory cell
                h: LSTM hidden state
            @return:
                y: the weight of y_{i}
                c: Updated LSTM memory cell
                h: Updated LSTM hidden state
        """
        e = self.ye(y)
        c, h = F.lstm(c, self.eh(e) + self.hh(h) + self.ch(cv))
        t = F.maxout(self.sm(h) + self.em(e) + self.cm(cv), self.POOL_SIZE)
        y = self.my(t)

        return y, c, h
    def generateHyp(self, enc_states):
        bosID = 1
        bos = self.xp.array([bosID], dtype=self.xp.int32)
        predicts = [bos]
        while len(predicts) - 1 < self.gen_limit:
            embedding = self.word2embedding(predicts[-1])
            previous_hidden = self.gru.h
            context = self.attention(previous_hidden, enc_states)
            hidden = self.gru(embedding, context)
            t = self.U_o(previous_hidden) + self.V_o(embedding) + self.C_o(
                context)
            t = chainFunc.maxout(t, 2)
            score = self.W_o(t)
            predict = chainFunc.argmax(score, axis=1)

            predicts.append(predict)
        del predicts[0]
        return predicts
Beispiel #9
0
 def __call__(self, x):
     V = self.embed(x)
     V_norm = F.normalize(V.transpose(0, 2, 1), axis=1)
     C = self.embed_class.W
     C_norm = F.normalize(C, axis=1)
     G = F.matmul(
         F.broadcast_to(
             C_norm, (V_norm.shape[0], C_norm.shape[0], C_norm.shape[1])),
         V_norm)
     u = F.relu(self.conv1(G))
     m = F.maxout(u, pool_size=self.n_class, axis=1)
     beta = F.softmax(m, axis=2)
     z = F.sum((V * F.broadcast_to(beta.transpose(0, 2, 1), V.shape)),
               axis=1)
     z = self.fc2(F.dropout(z))
     z = F.sigmoid(z)
     z_class = self.fc2(F.dropout(C))
     out = F.concat([z, z_class], axis=0)
     return out
    def __call__(self, enc_states, batch_tgt):
        loss = chainer.Variable(self.xp.zeros((), dtype=self.xp.float32))
        predicts = list()
        for previous_wordID, word in enumerate(batch_tgt[1:]):
            previous_hidden = self.gru.h
            embedding = self.word2embedding(batch_tgt[previous_wordID])
            #util.trace('embedding size decside: {}'.format(embedding.shape))
            context = self.attention(previous_hidden, enc_states)
            hidden = chainFunc.dropout(self.gru(embedding, context),
                                       self.dropoutr)
            t = self.U_o(previous_hidden) + self.V_o(embedding) + self.C_o(
                context)
            t = chainFunc.maxout(t, 2)
            score = self.W_o(t)
            predict = chainFunc.argmax(score, axis=1)
            loss += chainFunc.softmax_cross_entropy(score,
                                                    word,
                                                    ignore_label=-1)

            predicts.append(predict.data)
        return loss, predicts
Beispiel #11
0
 def maxpooling(self, xs, neighbor):
     sources = defaultdict(list)
     for ee in neighbor:
         for i in neighbor[ee]:
             sources[i].append(xs[ee])
     # sources:键值为实体编号i,值为与实体有关系的所有实体经转换函数后的embedding
     # 将sources根据实体集的编号排序
     # 最后得到的len(result)=len(entities)
     result = []
     for i, xxs in sorted(sources.items(), key=lambda x: x[0]):
         if len(xxs) == 1:
             x = xxs[0]
             x = self.forwardAA(x, xxs)  # attention
             result.append(x)
         else:
             x = F.concat(xxs, axis=0)  # -> (b,d)
             x = F.swapaxes(x, 0, 1)  # -> (d,b)
             x = F.maxout(x, len(xxs))  # -> (d,1)
             x = F.swapaxes(x, 0, 1)  # -> (1,d)
             x = self.forwardAA(x, xxs)  # attention
             result.append(x)
     return result
Beispiel #12
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.maxout(x, self.pool_size, self.axis),
         x_data,
         y_grad,
         dtype=numpy.float64)
Beispiel #13
0
 def __call__(self, x):
     return F.maxout(x, self.pool_size, self.axis)
Beispiel #14
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.maxout(x, self.pool_size, self.axis)
     gradient_check.assert_allclose(self.y, y.data)
Beispiel #15
0
 def test_invalid_shape_gpu(self):
     self.x.to_gpu()
     with self.assertRaises(self.error):
         functions.maxout(self.x, self.pool_size)
Beispiel #16
0
 def forward(self, inputs, device):
     x, = inputs
     return functions.maxout(x, self.pool_size, self.axis),
Beispiel #17
0
	def __call__(self, x):
		return F.maxout(x, self.pool_size, self.axis)
Beispiel #18
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.maxout(x, self.pool_size, self.axis)
     self.assertEqual(y.data.dtype, self.dtype)
     testing.assert_allclose(self.y, y.data)
Beispiel #19
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.maxout(x, self.pool_size, self.axis)
     gradient_check.assert_allclose(self.y, y.data)
Beispiel #20
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.maxout(x, self.pool_size, self.axis),
         x_data, y_grad, dtype=numpy.float64)
Beispiel #21
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.maxout(x, self.pool_size, self.axis),
         x_data, y_grad, atol=1e-2)
Beispiel #22
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.maxout(x, self.pool_size, self.axis),
         x_data,
         y_grad,
         eps=0.125)
Beispiel #23
0
 def test_invalid_shape_gpu(self):
     self.x.to_gpu()
     with self.assertRaises(self.error):
         functions.maxout(self.x, self.pool_size)
Beispiel #24
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.maxout(x, self.pool_size, self.axis)
     self.assertEqual(y.data.dtype, self.dtype)
     testing.assert_allclose(self.y, y.data)
Beispiel #25
0
	def __call__(self, x):
		return functions.maxout(x, self.pool_size)
Beispiel #26
0
	def __call__(self, x):
		return functions.maxout(x, self.pool_size)