def compute_loss(self, input_vocab, output_vocab, window_words, hidden_states): g, rnn_distribution, a = self.decode_one_step(input_vocab, window_words, hidden_states) # define p_vocab as 0 if output word is not in vocab p_vocab = F.select_item( rnn_distribution, xp.array( [self.vocab[output_vocab]], dtype=xp.int32)) if output_vocab in self.vocab else Variable( xp.array([0.0], dtype=xp.float32)) # compute cross entropy indexes = [i for i, x in enumerate(window_words) if x == output_vocab] exist_var = Variable(xp.array([0], dtype=xp.float32)) for idx in indexes: exist_var += F.select_item(a, xp.array([idx], dtype=xp.int32)) p_ptr = F.cast(exist_var, xp.float32) if indexes else Variable( xp.array([0.0], dtype=xp.float32)) cross_entropy = -F.log( F.linear_interpolate(g, p_vocab, p_ptr) + Variable(xp.array([0.01], dtype=xp.float32))) # compute attention loss attention_loss = F.cast(-F.log(g + exist_var), xp.float32) if indexes else Variable( xp.array([0.0], dtype=xp.float32)) return cross_entropy + attention_loss
def _call_1step(net: NStepRNNBase, hidden: ArrayLike, input: ArrayLike): if hidden is None: hidden = net.init_hx(input)[0] x = input h = hidden w = net.ws[0] b = net.bs[0] xw = F.concat([w[0], w[1], w[2]], axis=0) hw = F.concat([w[3], w[4], w[5]], axis=0) xb = F.concat([b[0], b[1], b[2]], axis=0) hb = F.concat([b[3], b[4], b[5]], axis=0) gru_x = F.linear(x, xw, xb) gru_h = F.linear(h, hw, hb) W_r_x, W_z_x, W_x = F.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = F.split_axis(gru_h, 3, axis=1) r = F.sigmoid(W_r_x + U_r_h) z = F.sigmoid(W_z_x + U_z_h) h_bar = F.tanh(W_x + r * U_x) h = F.linear_interpolate(z, hidden, h_bar) return h
def check_forward(self, p_data, x_data, y_data): p = chainer.Variable(p_data) x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.linear_interpolate(p, x, y) self.assertEqual(z.data.dtype.type, self.dtype) expect = self.p * self.x + (1 - self.p) * self.y testing.assert_allclose(z.data, expect, **self.check_forward_options)
def check_forward(self, p_data, x_data, y_data): p = chainer.Variable(p_data) x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.linear_interpolate(p, x, y) self.assertEqual(z.data.dtype.type, self.dtype) expect = self.p * self.x + (1 - self.p) * self.y testing.assert_allclose( z.data, expect, **self.check_forward_options)
def __call__(self, x, h): z = self.W_z(x) h_bar = self.W(x) if h is not None: r = F.sigmoid(self.W_r(x) + self.U_r(h)) z += self.U_z(h) h_bar += self.U(r * h) z = F.sigmoid(z) h_bar = F.tanh(h_bar) if h is not None: h_new = F.linear_interpolate(z, h_bar, h) else: h_new = z * h_bar return h_new
def __call__(self, x): z = self.W_z(x) h_bar = self.W(x) if self.h is not None: r = F.sigmoid(self.W_r(x) + self.U_r(self.h)) z += self.U_z(self.h) h_bar += self.U(r * self.h) z = F.sigmoid(z) h_bar = F.tanh(h_bar) if self.h is not None: h_new = F.linear_interpolate(z, h_bar, self.h) else: h_new = z * h_bar self.h = h_new # save the state return h_new
def __call__(self, word, context): #like a statefulGRU code z = self.W_z(word) h_bar = self.W(word) if self.h is not None: r = chainFunc.sigmoid( self.W_r(word) + self.U_r(self.h) + self.C_r(context)) z += self.U_z(self.h) + self.C_z(context) h_bar += self.U(r * self.h) + self.C(context) z = chainFunc.sigmoid(z) h_bar = chainFunc.tanh(h_bar) if self.h is not None: h_new = chainFunc.linear_interpolate(z, h_bar, self.h) else: h_new = z * h_bar self.h = h_new return self.h
def linear_interpolate(): x0 = rand((1, 2, 3, 4)) x1 = rand((1, 2, 3, 4)) x2 = rand((1, 2, 3, 4)) y = F.linear_interpolate(x0, x1, x2) return {'input-0': x0, 'input-1': x1, 'input-2': x2}, {'out': y}
def forward(self, inputs, device): p, x, y = inputs ret = functions.linear_interpolate(p, x, y) ret = functions.cast(ret, numpy.float64) return ret,