def test_append_word(self): decoding_options = { 'nnlm_weight': 1.0, 'lm_scale': 1.0, 'wi_penalty': 0.0, 'ignore_unk': False, 'unk_penalty': 0.0, 'linear_interpolation': False, 'max_tokens_per_node': 10, 'beam': None, 'recombination_order': None } initial_state = RecurrentState(self.network.recurrent_state_size) token1 = LatticeDecoder.Token(history=[self.sos_id], state=initial_state) token2 = LatticeDecoder.Token(history=[self.sos_id, self.yksi_id], state=initial_state) decoder = LatticeDecoder(self.network, decoding_options) self.assertSequenceEqual(token1.history, [self.sos_id]) self.assertSequenceEqual(token2.history, [self.sos_id, self.yksi_id]) assert_equal(token1.state.get(0), numpy.zeros(shape=(1,1,3)).astype(theano.config.floatX)) assert_equal(token2.state.get(0), numpy.zeros(shape=(1,1,3)).astype(theano.config.floatX)) self.assertEqual(token1.nn_lm_logprob, 0.0) self.assertEqual(token2.nn_lm_logprob, 0.0) decoder._append_word([token1, token2], self.kaksi_id) self.assertSequenceEqual(token1.history, [self.sos_id, self.kaksi_id]) self.assertSequenceEqual(token2.history, [self.sos_id, self.yksi_id, self.kaksi_id]) assert_equal(token1.state.get(0), numpy.ones(shape=(1,1,3)).astype(theano.config.floatX)) assert_equal(token2.state.get(0), numpy.ones(shape=(1,1,3)).astype(theano.config.floatX)) token1_nn_lm_logprob = math.log(self.sos_prob + self.kaksi_prob) token2_nn_lm_logprob = math.log(self.yksi_prob + self.kaksi_prob) self.assertAlmostEqual(token1.nn_lm_logprob, token1_nn_lm_logprob) self.assertAlmostEqual(token2.nn_lm_logprob, token2_nn_lm_logprob) decoder._append_word([token1, token2], self.eos_id) self.assertSequenceEqual(token1.history, [self.sos_id, self.kaksi_id, self.eos_id]) self.assertSequenceEqual(token2.history, [self.sos_id, self.yksi_id, self.kaksi_id, self.eos_id]) assert_equal(token1.state.get(0), numpy.ones(shape=(1,1,3)).astype(theano.config.floatX) * 2) assert_equal(token2.state.get(0), numpy.ones(shape=(1,1,3)).astype(theano.config.floatX) * 2) token1_nn_lm_logprob += math.log(self.kaksi_prob + self.eos_prob) token2_nn_lm_logprob += math.log(self.kaksi_prob + self.eos_prob) self.assertAlmostEqual(token1.nn_lm_logprob, token1_nn_lm_logprob) self.assertAlmostEqual(token2.nn_lm_logprob, token2_nn_lm_logprob) lm_scale = 2.0 token1.recompute_total(1.0, lm_scale, -0.01) token2.recompute_total(1.0, lm_scale, -0.01) self.assertAlmostEqual(token1.total_logprob, token1_nn_lm_logprob * lm_scale - 0.03) self.assertAlmostEqual(token2.total_logprob, token2_nn_lm_logprob * lm_scale - 0.04)
def test_append_word(self): decoding_options = { 'nnlm_weight': 1.0, 'lm_scale': 1.0, 'wi_penalty': 0.0, 'ignore_unk': False, 'unk_penalty': 0.0, 'linear_interpolation': False, 'max_tokens_per_node': 10, 'beam': None, 'recombination_order': None } initial_state = RecurrentState(self.network.recurrent_state_size) token1 = LatticeDecoder.Token(history=[self.sos_id], state=initial_state) token2 = LatticeDecoder.Token(history=[self.sos_id, self.yksi_id], state=initial_state) decoder = LatticeDecoder(self.network, decoding_options) self.assertSequenceEqual(token1.history, [self.sos_id]) self.assertSequenceEqual(token2.history, [self.sos_id, self.yksi_id]) assert_equal(token1.state.get(0), numpy.zeros(shape=(1, 1, 3)).astype(theano.config.floatX)) assert_equal(token2.state.get(0), numpy.zeros(shape=(1, 1, 3)).astype(theano.config.floatX)) self.assertEqual(token1.nn_lm_logprob, 0.0) self.assertEqual(token2.nn_lm_logprob, 0.0) decoder._append_word([token1, token2], self.kaksi_id) self.assertSequenceEqual(token1.history, [self.sos_id, self.kaksi_id]) self.assertSequenceEqual(token2.history, [self.sos_id, self.yksi_id, self.kaksi_id]) assert_equal(token1.state.get(0), numpy.ones(shape=(1, 1, 3)).astype(theano.config.floatX)) assert_equal(token2.state.get(0), numpy.ones(shape=(1, 1, 3)).astype(theano.config.floatX)) token1_nn_lm_logprob = math.log(self.sos_prob + self.kaksi_prob) token2_nn_lm_logprob = math.log(self.yksi_prob + self.kaksi_prob) self.assertAlmostEqual(token1.nn_lm_logprob, token1_nn_lm_logprob) self.assertAlmostEqual(token2.nn_lm_logprob, token2_nn_lm_logprob) decoder._append_word([token1, token2], self.eos_id) self.assertSequenceEqual(token1.history, [self.sos_id, self.kaksi_id, self.eos_id]) self.assertSequenceEqual( token2.history, [self.sos_id, self.yksi_id, self.kaksi_id, self.eos_id]) assert_equal( token1.state.get(0), numpy.ones(shape=(1, 1, 3)).astype(theano.config.floatX) * 2) assert_equal( token2.state.get(0), numpy.ones(shape=(1, 1, 3)).astype(theano.config.floatX) * 2) token1_nn_lm_logprob += math.log(self.kaksi_prob + self.eos_prob) token2_nn_lm_logprob += math.log(self.kaksi_prob + self.eos_prob) self.assertAlmostEqual(token1.nn_lm_logprob, token1_nn_lm_logprob) self.assertAlmostEqual(token2.nn_lm_logprob, token2_nn_lm_logprob) lm_scale = 2.0 token1.recompute_total(1.0, lm_scale, -0.01) token2.recompute_total(1.0, lm_scale, -0.01) self.assertAlmostEqual(token1.total_logprob, token1_nn_lm_logprob * lm_scale - 0.03) self.assertAlmostEqual(token2.total_logprob, token2_nn_lm_logprob * lm_scale - 0.04)