def test_crossentropy_softmax_and_gradient_z_sparse(): n_time = 3 n_batch = 2 n_dim = 5 s0 = np.array([[0, 0], [0, 1], [1, 1], [1, 2], [1, 2], [2, 2], [2, 2]], dtype=f32) s1 = np.array([[1, 2], [2, 3], [1, 1], [2, 0], [4, 1], [3, 3], [4, 4]], dtype=f32) w = np.array( [[.3, 1], [.7, .4], [.1, .6], [.3, .2], [.6, .3], [.4, .5], [.6, 9]], dtype=f32) m = np.array([[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0]], dtype=f32) print("y_target:\n%r" % NativeOp.sparse_to_dense(s0, s1, w, m, n_time, n_dim).eval()) np.random.seed(123) z = np.random.randn(n_time, n_batch, n_dim).astype(f32) print("z:\n%r" % z) print("y (softmax(z)):\n%r" % TheanoUtil.softmax(z).eval()) z_mask = np.array([[1, 1], [1, 1], [1, 1]], dtype=f32) args = (z, z_mask, s0, s1, w, m) ce1, gradz1 = NativeOp.crossentropy_softmax_and_gradient_z_sparse(*args) ce2, gradz2 = NativeOp.crossentropy_softmax_and_gradient_z_sparse__slow( *args) ce1 = ce1.eval() ce2 = ce2.eval() gradz1 = gradz1.eval() gradz2 = gradz2.eval() print("ce1:\n%r" % ce1) print("ce2:\n%r" % ce2) print("gradz1:\n%r" % gradz1) print("gradz2:\n%r" % gradz2) assert_almost_equal(ce1, ce2) assert_almost_equal(gradz1, gradz2)
def test_crossentropy_softmax_and_gradient_z_sparse(): n_time = 3 n_batch = 2 n_dim = 5 s0 = np.array([[0, 0], [0, 1], [1, 1], [1, 2], [1, 2], [2, 2], [2, 2]], dtype=f32) s1 = np.array([[1, 2], [2, 3], [1, 1], [2, 0], [4, 1], [3, 3], [4, 4]], dtype=f32) w = np.array([[.3,1], [.7,.4], [.1,.6], [.3,.2], [.6,.3], [.4,.5], [.6,9]], dtype=f32) m = np.array([[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0]], dtype=f32) print("y_target:\n%r" % NativeOp.sparse_to_dense(s0, s1, w, m, n_time, n_dim).eval()) np.random.seed(123) z = np.random.randn(n_time, n_batch, n_dim).astype(f32) print("z:\n%r" % z) print("y (softmax(z)):\n%r" % TheanoUtil.softmax(z).eval()) z_mask = np.array([[1,1], [1,1], [1,1]], dtype=f32) args = (z, z_mask, s0, s1, w, m) ce1, gradz1 = NativeOp.crossentropy_softmax_and_gradient_z_sparse(*args) ce2, gradz2 = NativeOp.crossentropy_softmax_and_gradient_z_sparse__slow(*args) ce1 = ce1.eval() ce2 = ce2.eval() gradz1 = gradz1.eval() gradz2 = gradz2.eval() print("ce1:\n%r" % ce1) print("ce2:\n%r" % ce2) print("gradz1:\n%r" % gradz1) print("gradz2:\n%r" % gradz2) assert_almost_equal(ce1, ce2) assert_almost_equal(gradz1, gradz2)
def test_crossentropy_softmax_and_gradient_z_sparse_viterbi(): n_time = 3 n_batch = 2 n_dim = 5 alignment = np.array([[0, 1], [1, 2], [2, 3]], dtype="int32") mask = np.array([[1, 1], [1, 1], [1, 1]], dtype=f32) y_t, y_i, y_w, y_mask = NativeOp.onehot_to_sparse(alignment, mask) np.random.seed(123) z = np.random.randn(n_time, n_batch, n_dim).astype(f32) z_mask = np.array([[1,1], [1,1], [1,1]], dtype=f32) nll1, _pcx1 = T.nnet.crossentropy_softmax_1hot(x=T.as_tensor_variable(z).reshape((n_time * n_batch, n_dim)), y_idx=T.as_tensor_variable(alignment).reshape((n_time * n_batch,))) nll2, _gradz2 = NativeOp.crossentropy_softmax_and_gradient_z_sparse(z, z_mask, y_t, y_i, y_w, y_mask) nll1 = nll1.eval() nll2 = nll2.eval() print("nll1:\n%r" % nll1) print("nll2:\n%r" % nll2)
def test_crossentropy_softmax_and_gradient_z_sparse_viterbi(): n_time = 3 n_batch = 2 n_dim = 5 alignment = np.array([[0, 1], [1, 2], [2, 3]], dtype="int32") mask = np.array([[1, 1], [1, 1], [1, 1]], dtype=f32) y_t, y_i, y_w, y_mask = NativeOp.onehot_to_sparse(alignment, mask) np.random.seed(123) z = np.random.randn(n_time, n_batch, n_dim).astype(f32) z_mask = np.array([[1, 1], [1, 1], [1, 1]], dtype=f32) nll1, _pcx1 = T.nnet.crossentropy_softmax_1hot( x=T.as_tensor_variable(z).reshape((n_time * n_batch, n_dim)), y_idx=T.as_tensor_variable(alignment).reshape((n_time * n_batch, ))) nll2, _gradz2 = NativeOp.crossentropy_softmax_and_gradient_z_sparse( z, z_mask, y_t, y_i, y_w, y_mask) nll1 = nll1.eval() nll2 = nll2.eval() print("nll1:\n%r" % nll1) print("nll2:\n%r" % nll2)