Esempio n. 1
0
    def test_Parameter_argument(self):
        # shape w/o data
        p = Parameter(Shape([2, 3]))
        self.assertEqual(p.shape(), Shape([2, 3]))

        # shape w/ Initializer
        p = Parameter(Shape([4, 3]), I.Constant(1))
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), [1] * 12)

        # shape w/ list[float]
        p = Parameter(Shape([4, 3]), self.list_data[:12])
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # ndarray w/o shape
        p = Parameter(init=self.ndarray_data[0])
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # ndarray w/ shape
        p = Parameter(Shape([2, 6]), init=self.ndarray_data[0])
        self.assertEqual(p.shape(), Shape([2, 6]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # list[float] w/o shape
        self.assertRaises(TypeError, lambda: Parameter(init=self.list_data[:12]))
Esempio n. 2
0
    def test_ModelTest_CheckSaveLoad_Same(self):
        shape = Shape([2, 2])
        values1 = [1, 2, 3, 4]
        values2 = [5, 6, 7, 8]
        tmp = tempfile.NamedTemporaryFile()

        m1 = Model()
        m2 = Model()
        p1 = Parameter(shape, I.Constant(0))
        p1.value += tF.raw_input(shape, values1)
        p2 = Parameter(shape, I.Constant(0))
        p2.value += tF.raw_input(shape, values2)
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.save(tmp.name)

        m1 = Model()
        m2 = Model()
        p1 = Parameter()
        p2 = Parameter()
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.load(tmp.name)

        self.assertTrue(p1.valid())
        self.assertTrue(p2.valid())
        self.assertEqual(shape, p1.shape())
        self.assertEqual(shape, p2.shape())
        self.assertEqual(values1, p1.value.to_list())
        self.assertEqual(values2, p2.value.to_list())
Esempio n. 3
0
    def test_Parameter_argument(self):
        # no argument
        p = Parameter()
        self.assertFalse(p.valid())

        # shape w/ Initializer
        p = Parameter(Shape([4, 3]), I.Constant(1))
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), [1] * 12)
Esempio n. 4
0
    def test_ModelTest_CheckSaveLoadWithStats(self):
        shape = Shape([2, 2])
        values1 = [1, 2, 3, 4]
        values2 = [5, 6, 7, 8]
        stats1 = [10, 20, 30, 40]
        stats2 = [50, 60, 70, 80]
        tmp = tempfile.NamedTemporaryFile()

        m1 = Model()
        m2 = Model()
        p1 = Parameter(shape, I.Constant(0))
        p1.value += tF.raw_input(shape, values1)
        p2 = Parameter(shape, I.Constant(0))
        p2.value += tF.raw_input(shape, values2)
        p1.add_stats("a", shape)
        p2.add_stats("b", shape)
        p1.stats["a"].reset_by_vector(stats1);
        p2.stats["b"].reset_by_vector(stats2);
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.save(tmp.name)

        m1 = Model()
        m2 = Model()
        p1 = Parameter()
        p2 = Parameter()
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.load(tmp.name)

        self.assertTrue(p1.valid())
        self.assertTrue(p2.valid())
        self.assertEqual(shape, p1.shape())
        self.assertEqual(shape, p2.shape())
        self.assertEqual(values1, p1.value.to_list())
        self.assertEqual(values2, p2.value.to_list())
        self.assertTrue("a" in p1.stats)
        self.assertTrue("b" in p2.stats)
        self.assertEqual(stats1, p1.stats["a"].to_list())
        self.assertEqual(stats2, p2.stats["b"].to_list())
Esempio n. 5
0
class LSTM(Model):
    """LSTM cell."""

    def __init__(self):
        self._pwxh = Parameter();
        self._pwhh = Parameter();
        self._pbh = Parameter();
        self.scan_attributes()

    def init(self, in_size, out_size):
        """Creates a new LSTM."""
        self._pwxh.init([4 * out_size, in_size], I.XavierUniform())
        self._pwhh.init([4 * out_size, out_size], I.XavierUniform())
        self._pbh.init([4 * out_size], I.Constant(0))

    def reset(self, init_c = Node(), init_h = Node()):
        """Initializes internal states."""
        out_size = self._pwhh.shape()[1]
        self._wxh = F.parameter(self._pwxh)
        self._whh = F.parameter(self._pwhh)
        self._bh = F.parameter(self._pbh)
        self._c = init_c if init_c.valid() else F.zeros([out_size])
        self._h = init_h if init_h.valid() else F.zeros([out_size])

    def forward(self, x):
        """One step forwarding."""
        out_size = self._pwhh.shape()[1]
        u = self._wxh @ x + self._whh @ self._h + self._bh
        i = F.sigmoid(F.slice(u, 0, 0, out_size))
        f = F.sigmoid(F.slice(u, 0, out_size, 2 * out_size));
        o = F.sigmoid(F.slice(u, 0, 2 * out_size, 3 * out_size));
        j = F.tanh(F.slice(u, 0, 3 * out_size, 4 * out_size));
        self._c = i * j + f * self._c;
        self._h = o * F.tanh(self._c);
        return self._h;

    def get_c(self):
        """Retrieves current internal cell state."""
        return self._c

    def get_h(self):
        """Retrieves current hidden value."""
        return self._h
Esempio n. 6
0
class TransformerEmbeddings(Model):
    def __init__(self, max_len, dropout):
        self.max_len = max_len
        self.dropout = dropout
        self.pe = None

        self.plookup = Parameter()
        self.pby = Parameter()
        self.scan_attributes()

    def init(self, vocab, d_model):
        self.plookup.init([d_model, vocab], I.XavierUniform())
        self.pby.init([1, vocab], I.XavierUniform())

    @function_type
    def encode(self, seq, train):
        lookup = self.F.parameter(self.plookup)
        d_model = lookup.shape()[0]
        if self.pe is None:
            self.pe = self.positional_encoding()

        embed = []
        for w in seq:
            e = self.F.pick(lookup, w, 1)
            embed.append(e)
        embed_tensor = self.F.transpose(self.F.concat(embed, 1))

        embed_tensor *= math.sqrt(d_model)
        pos = self.F.input(self.pe[:len(seq)])
        pe = self.F.dropout(embed_tensor + pos, self.dropout, train)
        return pe

    @function_type
    def decode(self, x, train):  # x: [seq_len, d_model]
        w = self.F.parameter(self.plookup)  # [d_model, vocab]
        by = self.F.broadcast(self.F.parameter(self.pby), 0,
                              x.shape()[0])  # [seq_len, vocab]
        return x @ w + by  # [seq_len, vocab]

    def positional_encoding(self):
        d_model = self.plookup.shape()[0]
        pe = np.zeros((self.max_len, d_model))
        position = np.expand_dims(np.arange(0, self.max_len), axis=1)
        div_term = np.exp(
            np.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
        div_term = np.expand_dims(div_term, axis=0)
        pe[:, 0::2] = np.sin(position * div_term)
        pe[:, 1::2] = np.cos(position * div_term)
        return pe
Esempio n. 7
0
class AttentionalEncoderDecoder(Model):
    """Encoder-decoder translation model with dot-attention."""
    def __init__(self):
        self.dropout_rate = DROPOUT_RATE
        self.psrc_lookup = Parameter()
        self.ptrg_lookup = Parameter()
        self.pwhj = Parameter()
        self.pbj = Parameter()
        self.pwjy = Parameter()
        self.pby = Parameter()
        self.src_fw_lstm = LSTM()
        self.src_bw_lstm = LSTM()
        self.trg_lstm = LSTM()
        self.add_all_parameters()
        self.add_all_submodels()

    def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size):
        """Creates a new AttentionalEncoderDecoder object."""
        self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform())
        self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform())
        self.pwhj.init([embed_size, 2 * hidden_size], I.XavierUniform())
        self.pbj.init([embed_size], I.Constant(0))
        self.pwjy.init([trg_vocab_size, embed_size], I.XavierUniform())
        self.pby.init([trg_vocab_size], I.Constant(0))
        self.src_fw_lstm.init(embed_size, hidden_size)
        self.src_bw_lstm.init(embed_size, hidden_size)
        self.trg_lstm.init(2 * embed_size, hidden_size)

    def encode(self, src_batch, train):
        """Encodes source sentences and prepares internal states."""
        # Embedding lookup.
        src_lookup = F.parameter(self.psrc_lookup)
        e_list = []
        for x in src_batch:
            e = F.pick(src_lookup, x, 1)
            e = F.dropout(e, self.dropout_rate, train)
            e_list.append(e)

        # Forward encoding
        self.src_fw_lstm.restart()
        f_list = []
        for e in e_list:
            f = self.src_fw_lstm.forward(e)
            f = F.dropout(f, self.dropout_rate, train)
            f_list.append(f)

        # Backward encoding
        self.src_bw_lstm.restart()
        b_list = []
        for e in reversed(e_list):
            b = self.src_bw_lstm.forward(e)
            b = F.dropout(b, self.dropout_rate, train)
            b_list.append(b)

        b_list.reverse()

        # Concatenates RNN states.
        fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))]
        self.concat_fb = F.concat(fb_list, 1)
        self.t_concat_fb = F.transpose(self.concat_fb)

        # Initializes decode states.
        embed_size = self.psrc_lookup.shape()[0]
        self.trg_lookup = F.parameter(self.ptrg_lookup)
        self.whj = F.parameter(self.pwhj)
        self.bj = F.parameter(self.pbj)
        self.wjy = F.parameter(self.pwjy)
        self.by = F.parameter(self.pby)
        self.feed = F.zeros([embed_size])
        self.trg_lstm.restart(
            self.src_fw_lstm.get_c() + self.src_bw_lstm.get_c(),
            self.src_fw_lstm.get_h() + self.src_bw_lstm.get_h())

    def decode_step(self, trg_words, train):
        """One step decoding."""
        e = F.pick(self.trg_lookup, trg_words, 1)
        e = F.dropout(e, self.dropout_rate, train)
        h = self.trg_lstm.forward(F.concat([e, self.feed], 0))
        h = F.dropout(h, self.dropout_rate, train)
        atten_probs = F.softmax(self.t_concat_fb @ h, 0)
        c = self.concat_fb @ atten_probs
        self.feed = F.tanh(self.whj @ F.concat([h, c], 0) + self.bj)
        return self.wjy @ self.feed + self.by

    def loss(self, trg_batch, train):
        """Calculates loss values."""
        losses = []
        for i in range(len(trg_batch) - 1):
            y = self.decode_step(trg_batch[i], train)
            loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0)
            losses.append(loss)
        return F.batch.mean(F.sum(losses))