def check_backward(self, x_data, W_data, b_data, y_grad): x = chainer.Variable(x_data) W = chainer.Variable(W_data) if b_data is None: y = functions.maxout(x, W) else: b = chainer.Variable(b_data) y = functions.maxout(x, W, b) y.grad = y_grad y.backward() func = y.creator if b_data is None: f = lambda: func.forward((x.data, W.data)) gx, gW = gradient_check.numerical_grad(f, (x.data, W.data), (y.grad, ), eps=1e-2) else: f = lambda: func.forward((x.data, W.data, b.data)) gx, gW, gb = gradient_check.numerical_grad( f, (x.data, W.data, b.data), (y.grad, ), eps=1e-2) gradient_check.assert_allclose(gx, x.grad, atol=1e-2) gradient_check.assert_allclose(gW, W.grad, atol=1e-2) if b_data is not None: gradient_check.assert_allclose(gb, b.grad, atol=1e-2)
def check_forward(self, x_data, W_data, b_data, y_expect): x = chainer.Variable(x_data) W = chainer.Variable(W_data) if b_data is None: y = functions.maxout(x, W) else: b = chainer.Variable(b_data) y = functions.maxout(x, W, b) gradient_check.assert_allclose(y_expect, y.data)
def __call__(self, s, q, s_mask, q_mask): """ s_bar, _, _ = self.pred_bilstm(None, None, s) s_bar_new = F.concat(s_bar, axis=1) q_bar, _, _ = self.pred_bilstm(None, None, q) q_bar_new = F.concat(q_bar, axis=1) """ _, _, s_bar = self.pred_bilstm(None, None, s) # get list of [seq, dim] s_bar_new = F.stack(s_bar, axis=0) # turn list to 3d tensor _, _, q_bar = self.pred_bilstm(None, None, q) # get list of [seq, dim] q_bar_new = F.stack(q_bar, axis=0) # turn list to 3d tensor # mean-max pooling s_sum = F.sum(s_mask, axis=-1) q_sum = F.sum(q_mask, axis=-1) s_batch, s_seq = s_mask.shape s_mask_broad = F.broadcast_to(F.reshape(s_mask, (s_batch, s_seq, 1)), (s_batch, s_seq, s_bar_new.shape[-1])) s_broad = s_bar_new * s_mask_broad """ s_infinit_matrix = self.xp.ones((s_batch, s_seq, s_bar_new.shape[-1]), dtype=self.xp.float32) * -1 * self.xp.inf s_cond = s_mask_broad.data.astype(self.xp.bool) s_broad_max = F.where(s_cond, s_bar_new, s_infinit_matrix) """ s_mean = F.average(s_broad, axis=1) # [batch_size, dim] s_max = F.maxout( F.reshape( s_bar_new, (s_bar_new.shape[0], s_bar_new.shape[1] * s_bar_new.shape[2])), s_bar_new.shape[-1]) # [batch_size, dim] q_batch, q_seq = q_mask.shape q_broad = q_bar_new * F.broadcast_to( F.reshape(q_mask, (q_batch, q_seq, 1)), (q_batch, q_seq, q_bar_new.shape[-1])) q_mean = F.average(q_broad, axis=1) # [batch_size, dim] q_max = F.maxout( F.reshape( q_bar_new, (q_bar_new.shape[0], q_bar_new.shape[1] * q_bar_new.shape[2])), q_bar_new.shape[-1]) # [batch_size, dim] summarized_vector = F.concat([s_mean, s_max, q_mean, q_max], axis=1) s_linear_output = self.gelu(self.L(summarized_vector)) y = F.softmax(s_linear_output) return y
def check_backward(self, x_data, W_data, b_data, y_grad): x = chainer.Variable(x_data) W = chainer.Variable(W_data) if b_data is None: y = functions.maxout(x, W) else: b = chainer.Variable(b_data) y = functions.maxout(x, W, b) y.grad = y_grad y.backward() func = y.creator if b_data is None: f = lambda: func.forward((x.data, W.data)) gx, gW = gradient_check.numerical_grad(f, (x.data, W.data), (y.grad,), eps=1e-2) else: f = lambda: func.forward((x.data, W.data, b.data)) gx, gW, gb = gradient_check.numerical_grad(f, (x.data, W.data, b.data), (y.grad,), eps=1e-2) gradient_check.assert_allclose(gx, x.grad, atol=1e-2) gradient_check.assert_allclose(gW, W.grad, atol=1e-2) if b_data is not None: gradient_check.assert_allclose(gb, b.grad, atol=1e-2)
def maxpooling(self,xs,neighbor): sources = defaultdict(list) for ee in neighbor: for i in neighbor[ee]: sources[i].append(xs[ee]) result = [] for i,xxs in sorted(sources.items(),key=lambda x:x[0]): if len(xxs)==1: result.append(xxs[0]) else: x = F.concat(xxs,axis=0) # -> (b,d) x = F.swapaxes(x,0,1) # -> (d,b) x = F.maxout(x,len(xxs)) # -> (d,1) x = F.swapaxes(x,0,1) # -> (1,d) result.append(x) return result
def __call__(self, y, cv, c, h): """ @param: y: y_{i-1}, last generated word cv: context vector c_{i} c: LSTM memory cell h: LSTM hidden state @return: y: the weight of y_{i} c: Updated LSTM memory cell h: Updated LSTM hidden state """ e = self.ye(y) c, h = F.lstm(c, self.eh(e) + self.hh(h) + self.ch(cv)) t = F.maxout(self.sm(h) + self.em(e) + self.cm(cv), self.POOL_SIZE) y = self.my(t) return y, c, h
def generateHyp(self, enc_states): bosID = 1 bos = self.xp.array([bosID], dtype=self.xp.int32) predicts = [bos] while len(predicts) - 1 < self.gen_limit: embedding = self.word2embedding(predicts[-1]) previous_hidden = self.gru.h context = self.attention(previous_hidden, enc_states) hidden = self.gru(embedding, context) t = self.U_o(previous_hidden) + self.V_o(embedding) + self.C_o( context) t = chainFunc.maxout(t, 2) score = self.W_o(t) predict = chainFunc.argmax(score, axis=1) predicts.append(predict) del predicts[0] return predicts
def __call__(self, x): V = self.embed(x) V_norm = F.normalize(V.transpose(0, 2, 1), axis=1) C = self.embed_class.W C_norm = F.normalize(C, axis=1) G = F.matmul( F.broadcast_to( C_norm, (V_norm.shape[0], C_norm.shape[0], C_norm.shape[1])), V_norm) u = F.relu(self.conv1(G)) m = F.maxout(u, pool_size=self.n_class, axis=1) beta = F.softmax(m, axis=2) z = F.sum((V * F.broadcast_to(beta.transpose(0, 2, 1), V.shape)), axis=1) z = self.fc2(F.dropout(z)) z = F.sigmoid(z) z_class = self.fc2(F.dropout(C)) out = F.concat([z, z_class], axis=0) return out
def __call__(self, enc_states, batch_tgt): loss = chainer.Variable(self.xp.zeros((), dtype=self.xp.float32)) predicts = list() for previous_wordID, word in enumerate(batch_tgt[1:]): previous_hidden = self.gru.h embedding = self.word2embedding(batch_tgt[previous_wordID]) #util.trace('embedding size decside: {}'.format(embedding.shape)) context = self.attention(previous_hidden, enc_states) hidden = chainFunc.dropout(self.gru(embedding, context), self.dropoutr) t = self.U_o(previous_hidden) + self.V_o(embedding) + self.C_o( context) t = chainFunc.maxout(t, 2) score = self.W_o(t) predict = chainFunc.argmax(score, axis=1) loss += chainFunc.softmax_cross_entropy(score, word, ignore_label=-1) predicts.append(predict.data) return loss, predicts
def maxpooling(self, xs, neighbor): sources = defaultdict(list) for ee in neighbor: for i in neighbor[ee]: sources[i].append(xs[ee]) # sources:键值为实体编号i,值为与实体有关系的所有实体经转换函数后的embedding # 将sources根据实体集的编号排序 # 最后得到的len(result)=len(entities) result = [] for i, xxs in sorted(sources.items(), key=lambda x: x[0]): if len(xxs) == 1: x = xxs[0] x = self.forwardAA(x, xxs) # attention result.append(x) else: x = F.concat(xxs, axis=0) # -> (b,d) x = F.swapaxes(x, 0, 1) # -> (d,b) x = F.maxout(x, len(xxs)) # -> (d,1) x = F.swapaxes(x, 0, 1) # -> (1,d) x = self.forwardAA(x, xxs) # attention result.append(x) return result
def check_backward(self, x_data, y_grad): gradient_check.check_backward( lambda x: functions.maxout(x, self.pool_size, self.axis), x_data, y_grad, dtype=numpy.float64)
def __call__(self, x): return F.maxout(x, self.pool_size, self.axis)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.maxout(x, self.pool_size, self.axis) gradient_check.assert_allclose(self.y, y.data)
def test_invalid_shape_gpu(self): self.x.to_gpu() with self.assertRaises(self.error): functions.maxout(self.x, self.pool_size)
def forward(self, inputs, device): x, = inputs return functions.maxout(x, self.pool_size, self.axis),
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.maxout(x, self.pool_size, self.axis) self.assertEqual(y.data.dtype, self.dtype) testing.assert_allclose(self.y, y.data)
def check_backward(self, x_data, y_grad): gradient_check.check_backward( lambda x: functions.maxout(x, self.pool_size, self.axis), x_data, y_grad, atol=1e-2)
def check_backward(self, x_data, y_grad): gradient_check.check_backward( lambda x: functions.maxout(x, self.pool_size, self.axis), x_data, y_grad, eps=0.125)
def __call__(self, x): return functions.maxout(x, self.pool_size)