def test_op_cross_entropy_with_soft_max(output_vector, target_vector, device_id, precision): dt = PRECISION_TO_TYPE[precision] o = AA(output_vector, dtype=dt) t = AA(target_vector, dtype=dt) ox = o - o.max() # subtract max to avoid overflow exp_x = np.exp(ox) s_max = exp_x / np.sum(exp_x) # softmax function expected_forward = np.asarray(-np.sum(t * np.log(s_max, dtype=dt), dtype=dt)) expected_forward.shape = (1,1,1) + expected_forward.shape s = np.sum(t, dtype=dt) backward = np.subtract(s_max * s, t) backward.shape = (1,) + backward.shape expected_backward = { 'left_arg': backward, 'right_arg': [-1*o] } from cntk.losses import cross_entropy_with_softmax _test_binary_op(precision, device_id, cross_entropy_with_softmax, output_vector, target_vector, expected_forward, expected_backward)
def test_op_cross_entropy_with_soft_max_and_axis(output_vector, target_vector, axis, device_id, precision): dt = PRECISION_TO_TYPE[precision] x = AA(output_vector, dtype=dt) t = AA(target_vector, dtype=dt) expected_forward = [] expected_backward_left = [] expected_backward_right = [] for sample, target in zip(x, t): ox = sample - sample.max() # subtract max to avoid overflow exp_x = np.exp(ox) s_max = exp_x / np.sum(exp_x) # softmax function forward = np.asarray(-np.sum(target * np.log(s_max, dtype=dt), dtype=dt)) expected_forward.append(forward.tolist()) s = np.sum(target, dtype=dt) backward = np.subtract(s_max * s, target) expected_backward_left.append(backward.tolist()) expected_backward_right.append(-1*sample) expected_forward = [np.reshape(AA(expected_forward, dtype=dt), (x.shape[0], 1))] expected_backward_left = AA(expected_backward_left, dtype=dt) expected_backward = { 'left_arg': [expected_backward_left], 'right_arg': [expected_backward_right] } from cntk.losses import cross_entropy_with_softmax _test_binary_op(precision, device_id, cross_entropy_with_softmax, output_vector, target_vector, expected_forward, expected_backward, op_param_dict={'axis': axis})
def test_op_classification_error(output_vector, target_vector, device_id, precision): dt = PRECISION_TO_TYPE[precision] o = AA(output_vector, dtype=dt) t = AA(target_vector, dtype=dt) different_position = np.argmax(t) != np.argmax(o) expected_forward = [AA([[int(different_position)]], dtype=dt)] zero_backward = np.zeros_like(t, dtype=dt) left_backward = np.copy(zero_backward) zero_backward[..., np.argmax(o)] = -1. right_backward = zero_backward expected_backward = { 'left_arg': [left_backward], 'right_arg': [right_backward] } from cntk.metrics import classification_error _test_binary_op(precision, device_id, classification_error, output_vector, target_vector, expected_forward, expected_backward)
def test_op_classification_error_with_axis(output_vector, target_vector, axis, device_id, precision): dt = PRECISION_TO_TYPE[precision] x = AA(output_vector, dtype=dt) t = AA(target_vector, dtype=dt) forward = [] expected_backward_left = [] expected_backward_right = [] for sample, target in zip(x, t): different_position = np.argmax(target) != np.argmax(sample) forward.append([int(different_position)]) zero_backward = np.zeros_like(target, dtype=dt) expected_backward_left.append(zero_backward) expected_backward_right.append(zero_backward) forward = np.mean(forward) expected_forward = AA([forward], dtype=dt) expected_backward_left = AA([expected_backward_left], dtype=dt) expected_backward_right = AA([expected_backward_right], dtype=dt) expected_backward = { 'left_arg': expected_backward_left, 'right_arg': expected_backward_right } from cntk.metrics import classification_error _test_binary_op(precision, device_id, classification_error, output_vector, target_vector, expected_forward, expected_backward, op_param_dict={'axis':axis})
def test_lambda_rank(grad, value, output, gain, device_id, precision): dt = PRECISION_TO_TYPE[precision] score = AA(output, dtype=dt).reshape(-1, 1, 1) gain = AA(gain, dtype=dt).reshape(-1, 1, 1) group = np.ones_like(score).reshape(-1, 1, 1) expected_value = AA(value, dtype=dt) expected_grad = AA(grad, dtype=dt) from cntk.losses import lambda_rank g = C.input_variable((1, )) s = C.input_variable((1, ), needs_gradient=True) n = C.input_variable((1, )) f = lambda_rank(s, n, g) actual_grad, actual_value = f.grad({ s: score, n: gain, g: group }, [s], [f.output]) assert np.allclose(actual_value, expected_value) assert np.allclose(actual_grad, expected_grad)
def test_op_squared_error(output_vector, target_vector, device_id, precision): dt = PRECISION_TO_TYPE[precision] o = AA(output_vector, dtype=dt) t = AA(target_vector, dtype=dt) expected_forward = AA([np.sum((t - o)**2)]) backward = 2 * np.subtract(o, t) expected_backward = {'left_arg': [backward], 'right_arg': [-1 * backward]} from cntk.losses import squared_error _test_binary_op(precision, device_id, squared_error, output_vector, target_vector, expected_forward, expected_backward)
def test_nce_loss(classes, xdim, batch, expected_value, device_id, precision): dt = PRECISION_TO_TYPE[precision] from cntk.losses import nce_loss import scipy x = C.input_variable(xdim, needs_gradient=True) y = C.input_variable(classes, is_sparse=True) x0 = np.arange(batch * xdim, dtype=dt).reshape( (batch, xdim)) / (batch * xdim) data = np.ones(batch, dtype=dt) indices = list(range(10, 10 * batch + 1, 10)) indptr = list(range(batch + 1)) y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes)) q = np.arange(classes, dtype=dt) + 1 b = C.parameter((classes, 1), init=-np.log(classes)) W = C.parameter((classes, C.InferredDimension), init=C.glorot_uniform(seed=98052)) loss = C.nce_loss(W, b, x, y, q, seed=98052) v = loss.grad({x: x0, y: y0}, wrt=loss.parameters, as_numpy=False) for key in v: assert v[ key].is_sparse, "gradient of nce_loss with respect to %s is not sparse" % key losses = np.zeros((100, batch)) for i in range(100): losses[i, :] = loss.eval({x: x0, y: y0}) assert np.allclose(np.mean(losses, axis=0), AA(expected_value))
def test_ndcg(value, output, gain, device_id, precision): dt = PRECISION_TO_TYPE[precision] score = AA(output, dtype=dt).reshape(-1,1,1) gain = AA(gain, dtype=dt).reshape(-1,1,1) group = np.ones_like(score).reshape(-1,1,1) expected_value = AA(value, dtype=dt) from cntk.metrics import ndcg_at_1 g = input((1,)) s = input((1,)) n = input((1,)) f = ndcg_at_1(s, n, g) actual_value = f.eval({s:score, n:gain, g:group}) assert np.allclose(actual_value, expected_value)