def test_Parameter_argument(self): # shape w/o data p = Parameter(Shape([2, 3])) self.assertEqual(p.shape(), Shape([2, 3])) # shape w/ Initializer p = Parameter(Shape([4, 3]), I.Constant(1)) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), [1] * 12) # shape w/ list[float] p = Parameter(Shape([4, 3]), self.list_data[:12]) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # ndarray w/o shape p = Parameter(init=self.ndarray_data[0]) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # ndarray w/ shape p = Parameter(Shape([2, 6]), init=self.ndarray_data[0]) self.assertEqual(p.shape(), Shape([2, 6])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # list[float] w/o shape self.assertRaises(TypeError, lambda: Parameter(init=self.list_data[:12]))
def test_ModelTest_CheckSaveLoad_Same(self): shape = Shape([2, 2]) values1 = [1, 2, 3, 4] values2 = [5, 6, 7, 8] tmp = tempfile.NamedTemporaryFile() m1 = Model() m2 = Model() p1 = Parameter(shape, I.Constant(0)) p1.value += tF.raw_input(shape, values1) p2 = Parameter(shape, I.Constant(0)) p2.value += tF.raw_input(shape, values2) m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.save(tmp.name) m1 = Model() m2 = Model() p1 = Parameter() p2 = Parameter() m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.load(tmp.name) self.assertTrue(p1.valid()) self.assertTrue(p2.valid()) self.assertEqual(shape, p1.shape()) self.assertEqual(shape, p2.shape()) self.assertEqual(values1, p1.value.to_list()) self.assertEqual(values2, p2.value.to_list())
def test_Parameter_argument(self): # no argument p = Parameter() self.assertFalse(p.valid()) # shape w/ Initializer p = Parameter(Shape([4, 3]), I.Constant(1)) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), [1] * 12)
def test_ModelTest_CheckSaveLoadWithStats(self): shape = Shape([2, 2]) values1 = [1, 2, 3, 4] values2 = [5, 6, 7, 8] stats1 = [10, 20, 30, 40] stats2 = [50, 60, 70, 80] tmp = tempfile.NamedTemporaryFile() m1 = Model() m2 = Model() p1 = Parameter(shape, I.Constant(0)) p1.value += tF.raw_input(shape, values1) p2 = Parameter(shape, I.Constant(0)) p2.value += tF.raw_input(shape, values2) p1.add_stats("a", shape) p2.add_stats("b", shape) p1.stats["a"].reset_by_vector(stats1); p2.stats["b"].reset_by_vector(stats2); m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.save(tmp.name) m1 = Model() m2 = Model() p1 = Parameter() p2 = Parameter() m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.load(tmp.name) self.assertTrue(p1.valid()) self.assertTrue(p2.valid()) self.assertEqual(shape, p1.shape()) self.assertEqual(shape, p2.shape()) self.assertEqual(values1, p1.value.to_list()) self.assertEqual(values2, p2.value.to_list()) self.assertTrue("a" in p1.stats) self.assertTrue("b" in p2.stats) self.assertEqual(stats1, p1.stats["a"].to_list()) self.assertEqual(stats2, p2.stats["b"].to_list())
class LSTM(Model): """LSTM cell.""" def __init__(self): self._pwxh = Parameter(); self._pwhh = Parameter(); self._pbh = Parameter(); self.scan_attributes() def init(self, in_size, out_size): """Creates a new LSTM.""" self._pwxh.init([4 * out_size, in_size], I.XavierUniform()) self._pwhh.init([4 * out_size, out_size], I.XavierUniform()) self._pbh.init([4 * out_size], I.Constant(0)) def reset(self, init_c = Node(), init_h = Node()): """Initializes internal states.""" out_size = self._pwhh.shape()[1] self._wxh = F.parameter(self._pwxh) self._whh = F.parameter(self._pwhh) self._bh = F.parameter(self._pbh) self._c = init_c if init_c.valid() else F.zeros([out_size]) self._h = init_h if init_h.valid() else F.zeros([out_size]) def forward(self, x): """One step forwarding.""" out_size = self._pwhh.shape()[1] u = self._wxh @ x + self._whh @ self._h + self._bh i = F.sigmoid(F.slice(u, 0, 0, out_size)) f = F.sigmoid(F.slice(u, 0, out_size, 2 * out_size)); o = F.sigmoid(F.slice(u, 0, 2 * out_size, 3 * out_size)); j = F.tanh(F.slice(u, 0, 3 * out_size, 4 * out_size)); self._c = i * j + f * self._c; self._h = o * F.tanh(self._c); return self._h; def get_c(self): """Retrieves current internal cell state.""" return self._c def get_h(self): """Retrieves current hidden value.""" return self._h
class TransformerEmbeddings(Model): def __init__(self, max_len, dropout): self.max_len = max_len self.dropout = dropout self.pe = None self.plookup = Parameter() self.pby = Parameter() self.scan_attributes() def init(self, vocab, d_model): self.plookup.init([d_model, vocab], I.XavierUniform()) self.pby.init([1, vocab], I.XavierUniform()) @function_type def encode(self, seq, train): lookup = self.F.parameter(self.plookup) d_model = lookup.shape()[0] if self.pe is None: self.pe = self.positional_encoding() embed = [] for w in seq: e = self.F.pick(lookup, w, 1) embed.append(e) embed_tensor = self.F.transpose(self.F.concat(embed, 1)) embed_tensor *= math.sqrt(d_model) pos = self.F.input(self.pe[:len(seq)]) pe = self.F.dropout(embed_tensor + pos, self.dropout, train) return pe @function_type def decode(self, x, train): # x: [seq_len, d_model] w = self.F.parameter(self.plookup) # [d_model, vocab] by = self.F.broadcast(self.F.parameter(self.pby), 0, x.shape()[0]) # [seq_len, vocab] return x @ w + by # [seq_len, vocab] def positional_encoding(self): d_model = self.plookup.shape()[0] pe = np.zeros((self.max_len, d_model)) position = np.expand_dims(np.arange(0, self.max_len), axis=1) div_term = np.exp( np.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)) div_term = np.expand_dims(div_term, axis=0) pe[:, 0::2] = np.sin(position * div_term) pe[:, 1::2] = np.cos(position * div_term) return pe
class AttentionalEncoderDecoder(Model): """Encoder-decoder translation model with dot-attention.""" def __init__(self): self.dropout_rate = DROPOUT_RATE self.psrc_lookup = Parameter() self.ptrg_lookup = Parameter() self.pwhj = Parameter() self.pbj = Parameter() self.pwjy = Parameter() self.pby = Parameter() self.src_fw_lstm = LSTM() self.src_bw_lstm = LSTM() self.trg_lstm = LSTM() self.add_all_parameters() self.add_all_submodels() def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size): """Creates a new AttentionalEncoderDecoder object.""" self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhj.init([embed_size, 2 * hidden_size], I.XavierUniform()) self.pbj.init([embed_size], I.Constant(0)) self.pwjy.init([trg_vocab_size, embed_size], I.XavierUniform()) self.pby.init([trg_vocab_size], I.Constant(0)) self.src_fw_lstm.init(embed_size, hidden_size) self.src_bw_lstm.init(embed_size, hidden_size) self.trg_lstm.init(2 * embed_size, hidden_size) def encode(self, src_batch, train): """Encodes source sentences and prepares internal states.""" # Embedding lookup. src_lookup = F.parameter(self.psrc_lookup) e_list = [] for x in src_batch: e = F.pick(src_lookup, x, 1) e = F.dropout(e, self.dropout_rate, train) e_list.append(e) # Forward encoding self.src_fw_lstm.restart() f_list = [] for e in e_list: f = self.src_fw_lstm.forward(e) f = F.dropout(f, self.dropout_rate, train) f_list.append(f) # Backward encoding self.src_bw_lstm.restart() b_list = [] for e in reversed(e_list): b = self.src_bw_lstm.forward(e) b = F.dropout(b, self.dropout_rate, train) b_list.append(b) b_list.reverse() # Concatenates RNN states. fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))] self.concat_fb = F.concat(fb_list, 1) self.t_concat_fb = F.transpose(self.concat_fb) # Initializes decode states. embed_size = self.psrc_lookup.shape()[0] self.trg_lookup = F.parameter(self.ptrg_lookup) self.whj = F.parameter(self.pwhj) self.bj = F.parameter(self.pbj) self.wjy = F.parameter(self.pwjy) self.by = F.parameter(self.pby) self.feed = F.zeros([embed_size]) self.trg_lstm.restart( self.src_fw_lstm.get_c() + self.src_bw_lstm.get_c(), self.src_fw_lstm.get_h() + self.src_bw_lstm.get_h()) def decode_step(self, trg_words, train): """One step decoding.""" e = F.pick(self.trg_lookup, trg_words, 1) e = F.dropout(e, self.dropout_rate, train) h = self.trg_lstm.forward(F.concat([e, self.feed], 0)) h = F.dropout(h, self.dropout_rate, train) atten_probs = F.softmax(self.t_concat_fb @ h, 0) c = self.concat_fb @ atten_probs self.feed = F.tanh(self.whj @ F.concat([h, c], 0) + self.bj) return self.wjy @ self.feed + self.by def loss(self, trg_batch, train): """Calculates loss values.""" losses = [] for i in range(len(trg_batch) - 1): y = self.decode_step(trg_batch[i], train) loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0) losses.append(loss) return F.batch.mean(F.sum(losses))