def test_cast_axes(transformer_factory): C = ng.make_axis(name='C') D = ng.make_axis(name='D') ex = ExecutorFactory() C.length = 2 D.length = 3 x = ng.placeholder((C, D)) x_slice = x[1, :] # Cast back to known axes x_cast = ng.cast_axes(x_slice, [D]) # Verfiy that the tensor broadcasts along ax.D y = x + x_cast y_fun = ex.executor(y, x) num_deriv_fun = ex.numeric_derivative(y, x, delta) sym_deriv_fun = ex.derivative(y, x) x_np = np.array([[10, 20, 30], [1, 2, 3]], dtype='float32') assert np.allclose(y_fun(x_np), np.array([[11, 22, 33], [2, 4, 6]], dtype='float32')) assert np.allclose(num_deriv_fun(x_np), sym_deriv_fun(x_np), rtol=rtol, atol=atol)
def test_cross_entropy_binary(transformer_factory): """TODO.""" N = ng.make_axis(name='N') W = ng.make_axis(name='W') delta = .001 W.length = 20 N.length = 128 axes = ng.make_axes([W, N]) p_u = ng.placeholder(axes) u = rng.uniform(-3.0, 3.0, p_u.axes) p_v = ng.placeholder(axes) v = rng.uniform(-3.0, 3.0, p_u.axes) y = ng.sigmoid(p_u) t = ng.softmax(p_v) val_u = ng.cross_entropy_binary_inner(y, t) ex = ExecutorFactory() dval_u_num_fun = ex.numeric_derivative(val_u, p_u, delta, p_v) dval_u_graph_fun = ex.derivative(val_u, p_u, p_v) dval_u_num = dval_u_num_fun(u, v) dval_u_graph = dval_u_graph_fun(u, v) np.testing.assert_allclose(dval_u_graph, dval_u_num, atol=1e-2, rtol=1e-2)
def check_derivative(f, x, delta, x_value, parameters=[], parameter_values=[], **kwargs): """ Check that the numeric and symbol derivatives of f with respect to x are the same when x has value x_value. Arguments: f: function to take the derivative of x: variable to take the derivative with respect to delta: distance to perturn x in numeric derivative x_value: the value of x we are going to compute the derivate of f at parameters: extra parameters to f parameter_values: value of extra parameters to f kwargs: passed to assert_allclose. Useful for atol/rtol. """ ex = ExecutorFactory() dfdx_numeric = ex.numeric_derivative(f, x, delta, *parameters) dfdx_symbolic = ex.derivative(f, x, *parameters) np.testing.assert_allclose(dfdx_numeric(x_value, *parameter_values), dfdx_symbolic(x_value, *parameter_values), **kwargs)
def test_stack(transformer_factory): ax = ng.make_name_scope(name="ax") ax.W = ng.make_axis(length=4) ax.H = ng.make_axis(length=5) ax.I = ng.make_axis(length=3) axes = ng.make_axes([ax.W, ax.H]) rng = RandomTensorGenerator(0, np.float32) a_v = [rng.uniform(0, 1, axes) for i in range(ax.I.length)] for pos in range(len(axes) + 1): a = [ng.placeholder(axes, initial_value=_) for _ in a_v] s = ng.stack(a, ax.I, pos) ex = ExecutorFactory() num_funs = [ex.numeric_derivative(s, _, delta) for _ in a] sym_funs = [ex.derivative(s, _) for _ in a] ex.transformer.initialize() for n_fun, s_fun, a_i in zip(num_funs, sym_funs, a_v): d_n = n_fun(a_i) d_s = s_fun(a_i) np.allclose(d_n, d_s, rtol=rtol, atol=atol)
def compare_tensors(func, inputs, expected_result, deriv=False, tol=0.): ex = ExecutorFactory() C = ng.make_axis('C') N = ng.make_axis('N', batch=True) C.length, N.length = inputs.shape x = ng.placeholder([C, N]) if deriv is False: costfunc = ex.executor(func.__call__(x), x) result = costfunc(inputs) else: costfunc = ex.derivative(func.__call__(x), x) result = costfunc(inputs) # hack to get derivatives result = result.ravel() result = result[0:result.size:(C.length * N.length + 1)] result = result.reshape(inputs.shape) np.testing.assert_allclose(result, expected_result, rtol=tol)
def test_elementwise_unary_ops_matched_args(transformer_factory): """TODO.""" delta = .001 axes = ng.make_axes([ng.make_axis(20), ng.make_axis(20)]) for np_op, be_op in ELEMENTWISE_UNARY_OPS: p_u = ng.placeholder(axes) u = rng.uniform(1.0, 2.0, p_u.axes) u_np = np_op(u) result_op = be_op(p_u) ex = ExecutorFactory() fun = ex.executor(result_op, p_u) dudunum_fun = ex.numeric_derivative(result_op, p_u, delta) dudut_fun = ex.derivative(result_op, p_u) u_t = fun(u) np.testing.assert_allclose(u_np, u_t, atol=1e-4, rtol=1e-4) dudunum = dudunum_fun(u) dudut = dudut_fun(u) np.testing.assert_allclose(dudunum, dudut, atol=1e-3, rtol=1e-3)
def test_dot_sum_backprop(transformer_factory): delta = 1e-3 rtol = atol = 1e-2 C = ng.make_axis(name='C', length=2) N = ng.make_axis(name='N', length=3, batch=True) x_axes = ng.make_axes((C - 1, N)) y_axes = ng.make_axes((C, )) x_np = np.random.random(x_axes.lengths).astype('float32') y_np = np.random.random(y_axes.lengths).astype('float32') # x_np[...] = [[1.0, 0.0,1.0], [2.0, 0.0, 3.0]] # y_np[...] = [-1.0, 1.0] x = ng.placeholder(x_axes) y = ng.placeholder(y_axes) d = ng.dot(x, y) s = ng.sum(d, out_axes=()) ex = ExecutorFactory() s_fun = ex.executor(s, x, y) d_fun = ex.executor(d, x, y) dd_dx_fun_num = ex.numeric_derivative(d, x, delta, y) dd_dx_fun_sym = ex.derivative(d, x, y) dd_dy_fun_num = ex.numeric_derivative(d, y, delta, x) dd_dy_fun_sym = ex.derivative(d, y, x) ds_dx_fun_num = ex.numeric_derivative(s, x, delta, y) ds_dx_fun_sym = ex.derivative(s, x, y) ds_dy_fun_num = ex.numeric_derivative(s, y, delta, x) ds_dy_fun_sym = ex.derivative(s, y, x) # assert outputs are equal d_np = x_np.T.dot(y_np) d_val = d_fun(x_np, y_np) np.testing.assert_allclose(d_np, d_val, rtol=rtol, atol=atol) dd_dx_val_num = dd_dx_fun_num(x_np, y_np) dd_dx_val_sym = dd_dx_fun_sym(x_np, y_np) np.testing.assert_allclose(dd_dx_val_num, dd_dx_val_sym, rtol=rtol, atol=atol) dd_dy_val_num = dd_dy_fun_num(y_np, x_np) dd_dy_val_sym = dd_dy_fun_sym(y_np, x_np) np.testing.assert_allclose(dd_dy_val_num, dd_dy_val_sym, rtol=rtol, atol=atol) s_np = np.sum(d_np) s_val = s_fun(x_np, y_np) np.testing.assert_allclose(s_val, s_np, rtol=rtol, atol=atol) # assert derivative wrt to both tensors is the same when computed # symbolically by ngraph and numerically ds_dx_val_num = ds_dx_fun_num(x_np, y_np) ds_dx_val_sym = ds_dx_fun_sym(x_np, y_np) np.testing.assert_allclose(ds_dx_val_num, ds_dx_val_sym, rtol=rtol, atol=atol) ds_dy_val_num = ds_dy_fun_num(y_np, x_np) ds_dy_val_sym = ds_dy_fun_sym(y_np, x_np) np.testing.assert_allclose(ds_dy_val_num, ds_dy_val_sym, rtol=rtol, atol=atol)
def test_tensor_dot_tensor(transformer_factory): """TODO.""" C = ng.make_axis(name='C') D = ng.make_axis(name='D') H = ng.make_axis(name='H') N = ng.make_axis(name='N') tests = [{ 'tensor1': [[1, 2], [4, 5], [3, 4]], 'tensor1_axes': (C, D - 1), 'tensor2': [2, 5], 'tensor2_axes': (D, ), 'expected_output': [12, 33, 26], 'axes_lengths': { C: 3, D: 2 } }, { 'tensor1': [[1, 4, 3], [2, 5, 4]], 'tensor1_axes': (D - 1, C), 'tensor2': [2, 5], 'tensor2_axes': (D, ), 'expected_output': [12, 33, 26], 'axes_lengths': { C: 3, D: 2 } }, { 'tensor1': [[[1, 4], [2, 5]], [[7, 12], [13, 2]]], 'tensor1_axes': (N, D - 1, C - 1), 'tensor2': [[[3, 6], [7, 2]], [[9, 8], [10, 4]]], 'tensor2_axes': (H, D, C), 'expected_output': [[51, 81], [188, 297]], 'axes_lengths': { N: 2, D: 2, C: 2, H: 2 } }, { 'tensor1': [1, 2], 'tensor1_axes': (C, ), 'tensor2': [7, 11, 13], 'tensor2_axes': (D, ), 'expected_output': [[7, 11, 13], [14, 22, 26]], 'axes_lengths': { C: 2, D: 3 } }, { 'tensor1': [[1, 4], [6, 2]], 'tensor1_axes': (C - 1, D - 1), 'tensor2': [[1, 4], [6, 2]], 'tensor2_axes': (C, D), 'expected_output': 57, 'axes_lengths': { C: 2, D: 2 } }] for test in tests: # set up axis for axis, length in test['axes_lengths'].items(): axis.length = length # set up tensors tensor1 = ng.placeholder(test['tensor1_axes']) value1 = np.array(test['tensor1'], dtype=np.float32) tensor2 = ng.placeholder(test['tensor2_axes']) value2 = np.array(test['tensor2'], dtype=np.float32) # compute outputs expected_output = np.array(test['expected_output'], dtype=np.float32) ex = ExecutorFactory() dot = ng.dot(tensor1, tensor2) evaluated_fun = ex.executor(dot, tensor1, tensor2) deriv1_fun_num = ex.numeric_derivative(dot, tensor1, 1e-3, tensor2) deriv1_fun_sym = ex.derivative(dot, tensor1, tensor2) deriv2_fun_num = ex.numeric_derivative(dot, tensor2, 1e-3, tensor1) deriv2_fun_sym = ex.derivative(dot, tensor2, tensor1) # assert outputs are equal evaluated = evaluated_fun(value1, value2) np.testing.assert_equal(evaluated, expected_output) # assert derivative wrt to both tensors is the same when computed # symbolically by ngraph and numerically deriv1_val_num = deriv1_fun_num(value1, value2) deriv1_val_sym = deriv1_fun_sym(value1, value2) np.testing.assert_allclose(deriv1_val_num, deriv1_val_sym, rtol=1e-2, atol=1e-2) deriv2_val_num = deriv2_fun_num(value2, value1) deriv2_val_sym = deriv2_fun_sym(value2, value1) np.testing.assert_allclose(deriv2_val_num, deriv2_val_sym, rtol=1e-2, atol=1e-2)
def check_rnn(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True): # init_func is the initializer for the model params assert batch_size == 1, "the recurrent reference implementation only support batch size 1" # ========== neon model ========== Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, recurrent=True) N = ng.make_axis(batch_size, batch=True) H = ng.make_axis(hidden_size) ax_s = ng.make_axes([H, N]) ex = ExecutorFactory() np.random.seed(0) rnn_ng = Recurrent(hidden_size, init_func, activation=Tanh(), reset_cells=True, return_sequence=return_seq) inp_ng = ng.placeholder([Cin, REC, N]) init_state_ng = ng.placeholder(ax_s) # fprop graph out_ng = rnn_ng.train_outputs(inp_ng, init_state=init_state_ng) out_ng.input = True rnn_W_input = rnn_ng.W_input rnn_W_input.input = True rnn_W_recur = rnn_ng.W_recur rnn_W_recur.input = True rnn_b = rnn_ng.b rnn_b.input = True fprop_neon_fun = ex.executor(out_ng, inp_ng, init_state_ng) dWrecur_s_fun = ex.derivative(out_ng, rnn_W_recur, inp_ng, rnn_W_input, rnn_b) dWrecur_n_fun = ex.numeric_derivative(out_ng, rnn_W_recur, delta, inp_ng, rnn_W_input, rnn_b) dWinput_s_fun = ex.derivative(out_ng, rnn_W_input, inp_ng, rnn_W_recur, rnn_b) dWinput_n_fun = ex.numeric_derivative(out_ng, rnn_W_input, delta, inp_ng, rnn_W_recur, rnn_b) dWb_s_fun = ex.derivative(out_ng, rnn_b, inp_ng, rnn_W_input, rnn_W_recur) dWb_n_fun = ex.numeric_derivative(out_ng, rnn_b, delta, inp_ng, rnn_W_input, rnn_W_recur) # fprop on random inputs input_value = rng.uniform(-1, 1, inp_ng.axes) init_state_value = rng.uniform(-1, 1, init_state_ng.axes) fprop_neon = fprop_neon_fun(input_value, init_state_value).copy() # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives Wxh_neon = rnn_ng.W_input.value.get(None).copy() Whh_neon = rnn_ng.W_recur.value.get(None).copy() bh_neon = rnn_ng.b.value.get(None).copy() # bprop derivs dWrecur_s = dWrecur_s_fun(Whh_neon, input_value, Wxh_neon, bh_neon) dWrecur_n = dWrecur_n_fun(Whh_neon, input_value, Wxh_neon, bh_neon) np.testing.assert_allclose(dWrecur_s, dWrecur_n, rtol=rtol, atol=atol) dWb_s = dWb_s_fun(bh_neon, input_value, Wxh_neon, Whh_neon) dWb_n = dWb_n_fun(bh_neon, input_value, Wxh_neon, Whh_neon) np.testing.assert_allclose(dWb_s, dWb_n, rtol=rtol, atol=atol) dWinput_s = dWinput_s_fun(Wxh_neon, input_value, Whh_neon, bh_neon) dWinput_n = dWinput_n_fun(Wxh_neon, input_value, Whh_neon, bh_neon) np.testing.assert_allclose(dWinput_s, dWinput_n, rtol=rtol, atol=atol) # ========= reference model ========== output_shape = (hidden_size, seq_len * batch_size) # generate random deltas tensor deltas = np.random.randn(*output_shape) # the reference code expects these shapes: # input_shape: (seq_len, input_size, batch_size) # output_shape: (seq_len, hidden_size, batch_size) deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size).swapaxes(1, 2) inp_ref = input_value.transpose([1, 0, 2]) # reference numpy RNN rnn_ref = RefRecurrent(input_size, hidden_size) rnn_ref.Wxh[:] = Wxh_neon rnn_ref.Whh[:] = Whh_neon rnn_ref.bh[:] = bh_neon.reshape(rnn_ref.bh.shape) (dWxh_ref, dWhh_ref, db_ref, h_ref_list, dh_ref_list, d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref, init_states=init_state_value) # comparing outputs if return_seq is False: h_ref_list = h_ref_list[:, -1].reshape(-1, 1) else: fprop_neon = fprop_neon[:, :, 0] np.testing.assert_allclose(fprop_neon, h_ref_list, rtol=0.0, atol=1.0e-5) return
def test_slice(transformer_factory): """TODO.""" C = ng.make_axis(name='C') D = ng.make_axis(name='D') tests = [{ 'tensor': [[1, 3], [2, 5]], 'tensor_axes': (C, D), 'slice': [0, 1], 'sliced_axes': (), 'axes_lengths': { C: 2, D: 2 }, 'expected': 3 }, { 'tensor': [[1, 3], [2, 5]], 'tensor_axes': (C, D), 'slice': [slice(None), 0], 'sliced_axes': (C, ), 'axes_lengths': { C: 2, D: 2 }, 'expected': [1, 2] }, { 'tensor': [[1, 3], [2, 5]], 'tensor_axes': (C, D), 'slice': [1, slice(None)], 'sliced_axes': (D, ), 'axes_lengths': { C: 2, D: 2 }, 'expected': [2, 5] }, { 'tensor': [[1, 4, 5], [2, 5, 6]], 'tensor_axes': (C, D), 'slice': [1, slice(1, 3)], 'sliced_axes': None, 'axes_lengths': { C: 2, D: 3 }, 'expected': [5, 6] }, { 'tensor': [[1, 4, 5], [2, 5, 6]], 'tensor_axes': (C, D), 'slice': [1, slice(None, None, -1)], 'sliced_axes': None, 'axes_lengths': { C: 2, D: 3 }, 'expected': [6, 5, 2] }, { 'tensor': [[1, 4, 5], [2, 5, 6]], 'tensor_axes': (C, D), 'slice': [slice(None, None, -1), slice(None, None, -1)], 'sliced_axes': None, 'axes_lengths': { C: 2, D: 3 }, 'expected': [[6, 5, 2], [5, 4, 1]] }] for test in tests: ex = ExecutorFactory() for axis, length in test['axes_lengths'].items(): axis.length = length tensor_axes = test['tensor_axes'] tensor_np = np.array(test['tensor'], dtype='float32') tensor = ng.placeholder(tensor_axes) expected = np.array(test['expected'], dtype='float32') s = test['slice'] s_axes = test['sliced_axes'] sliced = ng.Slice(tensor, s, s_axes) sliced_val_fun = ex.executor(sliced, tensor) num_deriv_fun = ex.numeric_derivative(sliced, tensor, delta) # Test backpropagation sym_deriv_fun = ex.derivative(sliced, tensor) sliced_val = sliced_val_fun(tensor_np) assert np.array_equal(sliced_val, expected) numeric_deriv = num_deriv_fun(tensor_np) sym_deriv = sym_deriv_fun(tensor_np) assert np.allclose(numeric_deriv, sym_deriv, rtol=rtol, atol=atol)
def test_expand_dims(transformer_factory): """TODO.""" C = ng.make_axis(name='C') D = ng.make_axis(name='D') N = ng.make_axis(name='N') max_new_axis_length = 4 tests = [{ 'tensor': [[2, 5], [13, 5]], 'tensor_axes': (N, D), 'tensor_axes_lengths': (2, 2), 'new_axis': C, }, { 'tensor': 2, 'tensor_axes': (), 'tensor_axes_lengths': (), 'new_axis': D }] for test in tests: for new_axis_length in range(1, max_new_axis_length + 1): tensor_axes = test['tensor_axes'] tensor_axes_lengths = test['tensor_axes_lengths'] for dim in range(len(tensor_axes) + 1): ex = ExecutorFactory() for axis, length in zip(tensor_axes, tensor_axes_lengths): axis.length = length new_axis = test['new_axis'] new_axis.length = new_axis_length tensor_np = np.array(test['tensor'], dtype=np.float32) tensor = ng.placeholder(tensor_axes) expanded = ng.ExpandDims(tensor, new_axis, dim) expander_fun = ex.executor(expanded, tensor) num_deriv_fun = ex.numeric_derivative(expanded, tensor, delta) sym_deriv_fun = ex.derivative(expanded, tensor) expanded_shape = tensor_np.shape[:dim] \ + (new_axis.length,) + tensor_np.shape[dim:] expanded_strides = tensor_np.strides[:dim] \ + (0,) + tensor_np.strides[dim:] expanded_np = np.ndarray(buffer=tensor_np, shape=expanded_shape, strides=expanded_strides, dtype=tensor_np.dtype) expanded_result = expander_fun(tensor_np) assert np.array_equal(expanded_np, expanded_result) # Test backpropagation numeric_deriv = num_deriv_fun(tensor_np) sym_deriv = sym_deriv_fun(tensor_np) assert np.allclose(numeric_deriv, sym_deriv, rtol=rtol, atol=atol)
def test_padding(transformer_factory): """TODO.""" C = ng.make_axis(name='C') D = ng.make_axis(name='D') M = ng.make_axis(name='M') N = ng.make_axis(name='N') tests = [{ 'tensor': [[1, 3], [2, 5]], 'tensor_axes': (C, D), 'padding': [(0, 1), (1, 0)], 'padded_axes': (M, N), 'axes_lengths': { C: 2, D: 2, M: 3, N: 3 } }, { 'tensor': [[1, 4, 5], [1, 4, 6]], 'tensor_axes': (C, D), 'padding': [(0, 1), 1], 'padded_axes': None, 'axes_lengths': { C: 2, D: 3 } }] for test in tests: ex = ExecutorFactory() for axis, length in test['axes_lengths'].items(): axis.length = length tensor_axes = test['tensor_axes'] tensor_np = np.array(test['tensor'], dtype='float32') tensor = ng.placeholder(tensor_axes) padding = test['padding'] padded_axes = test['padded_axes'] padded = ng.pad(tensor, padding, padded_axes) computed_val_fun = ex.executor(padded, tensor) # Test backpropagation numeric_deriv_fun = ex.numeric_derivative(padded, tensor, delta) sym_deriv_fun = ex.derivative(padded, tensor) def to_tuple(p): """ TODO. Arguments: p: TODO Returns: """ return (p, p) if isinstance(p, int) else p np_padding = tuple(to_tuple(p) for p in padding) expected_val = np.pad(tensor_np, np_padding, mode='constant') computed_val = computed_val_fun(tensor_np) assert np.array_equal(expected_val, computed_val) numeric_deriv = numeric_deriv_fun(tensor_np) sym_deriv = sym_deriv_fun(tensor_np) assert np.allclose(numeric_deriv, sym_deriv, rtol=rtol, atol=atol)
def test_elementwise_ops_unmatched_args(transformer_factory): """TODO.""" # delta = .001 N = ng.make_axis(name='N') H = ng.make_axis(name='H') W = ng.make_axis(name='W') W.length = 5 H.length = 5 N.length = 32 sample_axes = [W, H] batch_axes = [W, H, N] broadcast_dims = (W.length, H.length, 1) for np_op, be_op in ELEMENTWISE_BINARY_OPS: # Matched sizes p_u = ng.placeholder(sample_axes) p_v = ng.placeholder(batch_axes) u = rng.uniform(1.0, 2.0, p_u.axes) v = rng.uniform(1.0, 2.0, p_v.axes) # u op v uv_np = np_op(u.reshape(broadcast_dims), v) uv_op = be_op(p_u, p_v) ex = ExecutorFactory() # fun(u, v) uv_fun = ex.executor(uv_op, p_u, p_v) duvdunum_fun = ex.numeric_derivative(uv_op, p_u, .001, p_v) duvdut_fun = ex.derivative(uv_op, p_u, p_v) duvdvnum_fun = ex.numeric_derivative(uv_op, p_v, .001, p_u) duvdvt_fun = ex.derivative(uv_op, p_v, p_u) # fun(v, u) vu_np = np_op(v, u.reshape(broadcast_dims)) vu_op = be_op(p_v, p_u) vu_fun = ex.executor(vu_op, p_u, p_v) dvudunum_fun = ex.numeric_derivative(vu_op, p_u, .001, p_v) dvudut_fun = ex.derivative(vu_op, p_u, p_v) dvudvnum_fun = ex.numeric_derivative(vu_op, p_v, .001, p_u) dvudvt_fun = ex.derivative(vu_op, p_v, p_u) # u op v result_be = uv_fun(u, v) np.testing.assert_allclose(uv_np, result_be, atol=1e-4, rtol=1e-4) duvdunum = duvdunum_fun(u, v) duvdut = duvdut_fun(u, v) np.testing.assert_allclose(duvdunum, duvdut, atol=1e-3, rtol=1e-3) duvdvnum = duvdvnum_fun(v, u) duvdvt = duvdvt_fun(v, u) np.testing.assert_allclose(duvdvnum, duvdvt, atol=1e-3, rtol=1e-3) # v op u result_be = vu_fun(u, v) np.testing.assert_allclose(vu_np, result_be, atol=1e-4, rtol=1e-4) dvudunum = dvudunum_fun(u, v) dvudut = dvudut_fun(u, v) np.testing.assert_allclose(dvudunum, dvudut, atol=1e-3, rtol=1e-3) dvudvnum = dvudvnum_fun(v, u) dvudvt = dvudvt_fun(v, u) np.testing.assert_allclose(dvudvnum, dvudvt, atol=1e-3, rtol=1e-3)