Exemple #1
0
    def compute_loss(self, input_vocab, output_vocab, window_words,
                     hidden_states):
        g, rnn_distribution, a = self.decode_one_step(input_vocab,
                                                      window_words,
                                                      hidden_states)
        # define p_vocab as 0 if output word is not in vocab
        p_vocab = F.select_item(
            rnn_distribution,
            xp.array(
                [self.vocab[output_vocab]],
                dtype=xp.int32)) if output_vocab in self.vocab else Variable(
                    xp.array([0.0], dtype=xp.float32))

        # compute cross entropy
        indexes = [i for i, x in enumerate(window_words) if x == output_vocab]
        exist_var = Variable(xp.array([0], dtype=xp.float32))
        for idx in indexes:
            exist_var += F.select_item(a, xp.array([idx], dtype=xp.int32))
        p_ptr = F.cast(exist_var, xp.float32) if indexes else Variable(
            xp.array([0.0], dtype=xp.float32))
        cross_entropy = -F.log(
            F.linear_interpolate(g, p_vocab, p_ptr) +
            Variable(xp.array([0.01], dtype=xp.float32)))

        # compute attention loss
        attention_loss = F.cast(-F.log(g + exist_var),
                                xp.float32) if indexes else Variable(
                                    xp.array([0.0], dtype=xp.float32))
        return cross_entropy + attention_loss
def _call_1step(net: NStepRNNBase, hidden: ArrayLike, input: ArrayLike):
    if hidden is None:
        hidden = net.init_hx(input)[0]

    x = input
    h = hidden
    w = net.ws[0]
    b = net.bs[0]

    xw = F.concat([w[0], w[1], w[2]], axis=0)
    hw = F.concat([w[3], w[4], w[5]], axis=0)
    xb = F.concat([b[0], b[1], b[2]], axis=0)
    hb = F.concat([b[3], b[4], b[5]], axis=0)

    gru_x = F.linear(x, xw, xb)
    gru_h = F.linear(h, hw, hb)

    W_r_x, W_z_x, W_x = F.split_axis(gru_x, 3, axis=1)
    U_r_h, U_z_h, U_x = F.split_axis(gru_h, 3, axis=1)

    r = F.sigmoid(W_r_x + U_r_h)
    z = F.sigmoid(W_z_x + U_z_h)
    h_bar = F.tanh(W_x + r * U_x)
    h = F.linear_interpolate(z, hidden, h_bar)
    return h
    def check_forward(self, p_data, x_data, y_data):
        p = chainer.Variable(p_data)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        z = functions.linear_interpolate(p, x, y)
        self.assertEqual(z.data.dtype.type, self.dtype)
        expect = self.p * self.x + (1 - self.p) * self.y
        testing.assert_allclose(z.data, expect, **self.check_forward_options)
    def check_forward(self, p_data, x_data, y_data):
        p = chainer.Variable(p_data)
        x = chainer.Variable(x_data)
        y = chainer.Variable(y_data)

        z = functions.linear_interpolate(p, x, y)
        self.assertEqual(z.data.dtype.type, self.dtype)
        expect = self.p * self.x + (1 - self.p) * self.y
        testing.assert_allclose(
            z.data, expect, **self.check_forward_options)
    def __call__(self, x, h):
        z = self.W_z(x)
        h_bar = self.W(x)
        if h is not None:
            r = F.sigmoid(self.W_r(x) + self.U_r(h))
            z += self.U_z(h)
            h_bar += self.U(r * h)
        z = F.sigmoid(z)
        h_bar = F.tanh(h_bar)

        if h is not None:
            h_new = F.linear_interpolate(z, h_bar, h)
        else:
            h_new = z * h_bar
        return h_new
    def __call__(self, x):
        z = self.W_z(x)
        h_bar = self.W(x)
        if self.h is not None:
            r = F.sigmoid(self.W_r(x) + self.U_r(self.h))
            z += self.U_z(self.h)
            h_bar += self.U(r * self.h)
        z = F.sigmoid(z)
        h_bar = F.tanh(h_bar)

        if self.h is not None:
            h_new = F.linear_interpolate(z, h_bar, self.h)
        else:
            h_new = z * h_bar
        self.h = h_new  # save the state
        return h_new
    def __call__(self, word, context):
        #like a statefulGRU code
        z = self.W_z(word)
        h_bar = self.W(word)
        if self.h is not None:
            r = chainFunc.sigmoid(
                self.W_r(word) + self.U_r(self.h) + self.C_r(context))
            z += self.U_z(self.h) + self.C_z(context)
            h_bar += self.U(r * self.h) + self.C(context)
        z = chainFunc.sigmoid(z)
        h_bar = chainFunc.tanh(h_bar)

        if self.h is not None:
            h_new = chainFunc.linear_interpolate(z, h_bar, self.h)
        else:
            h_new = z * h_bar
        self.h = h_new
        return self.h
Exemple #8
0
def linear_interpolate():
    x0 = rand((1, 2, 3, 4))
    x1 = rand((1, 2, 3, 4))
    x2 = rand((1, 2, 3, 4))
    y = F.linear_interpolate(x0, x1, x2)
    return {'input-0': x0, 'input-1': x1, 'input-2': x2}, {'out': y}
 def forward(self, inputs, device):
     p, x, y = inputs
     ret = functions.linear_interpolate(p, x, y)
     ret = functions.cast(ret, numpy.float64)
     return ret,
 def forward(self, inputs, device):
     p, x, y = inputs
     ret = functions.linear_interpolate(p, x, y)
     ret = functions.cast(ret, numpy.float64)
     return ret,