def test_weight_clipping(w_clip, optimizer): opt_ng = optimizer(0.1, weight_clip_value=w_clip) # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested # make sure initial values are higher than clip values np_W = 10 * w_clip * (2 * np.random.rand(C.length) - 1) W = ng.variable([C], initial_value=np_W) # double check generated initial W value assert np.max(np_W) > w_clip assert np.min(np_W) < -w_clip # Set up op graph cost = ng.sum(target - ng.dot(W, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost), W]) epsilon = w_clip * 1e-3 # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation(updated_weights, data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: ng_W = opt_ng_comp(x, y) # updated weights for ngraph optimizer assert np.max(ng_W) < w_clip + epsilon assert np.min(ng_W) > -w_clip - epsilon
def compare_optimizer_variable_select(opt_ng, opt_ref): # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested np_W1 = np.random.rand(C.length) np_W2 = np.random.rand(C.length) W1 = ng.variable([C], initial_value=np_W1) W2 = ng.variable([C], initial_value=np_W2) # Set up op graph cost = ng.sum(target - ng.dot(W1, data) - ng.dot(W2, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost, variables=[W1]), W1]) # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation([updated_weights, W2], data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: [ng_W1, ng_W2] = opt_ng_comp(x, y) # updated weights for ngraph optimizer np_W1 = opt_ref(x, np_W1) # updated weights for reference optimizer ng.testing.assert_allclose(np_W1, ng_W1, rtol=1e-3) ng.testing.assert_allclose(np_W2, ng_W2, rtol=1e-3)
def test_sequential_reduce(M): x = ng.variable([M], initial_value=1) x0 = x + x x1 = ng.sum(x0, out_axes=()) x2 = ng.sum(x0, out_axes=()) + x0 p = ng.sequential([x0, x1, x2]) with ExecutorFactory() as ex: x0_val, x1_val, x2_val, p_val, x_val = ex.executor([x0, x1, x2, p, x])() x0_np = x_val + x_val x1_np = np.sum(x0_np) x2_np = x1_np + x0_np assert np.allclose(x0_val, x0_np) assert np.allclose(x1_val, x1_np) assert np.allclose(x2_val, x2_np) assert np.allclose(p_val, x2_np)
def test_sequential_side(M): x1_np = 2 x2_np = 3 b_np = 1 x_np = np.array([1, 2, 3], dtype=np.float32) x = ng.variable([M], initial_value=x_np) x1 = ng.persistent_tensor(axes=(), initial_value=x1_np) x2 = ng.persistent_tensor(axes=(), initial_value=x2_np) x1_vo = ng.value_of(x1) x2_vo = ng.value_of(x2) b = ng.persistent_tensor(axes=(), initial_value=b_np) y = ng.sequential([ x1_vo, x2_vo, ng.assign(x1, ng.sum(x, out_axes=()) + x1 * b + (1 - b)), ng.assign(x2, ng.mean(x, out_axes=()) + x2 * b + (1 - b)), x * 2 ]) with ExecutorFactory() as ex: main_effect = ex.executor((y, x1_vo, x2_vo, x1, x2)) current_values = ex.executor((x1, x2)) # Run main path #1 y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect( ) y_np = x_np * 2 assert np.allclose(y_val, y_np) assert np.allclose(x1_init_val, x1_np) assert np.allclose(x2_init_val, x2_np) x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np) x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np) assert np.allclose(x1_final_val, x1_np) assert np.allclose(x2_final_val, x2_np) x1_val, x2_val = current_values() assert np.allclose(x1_val, x1_np) assert np.allclose(x2_val, x2_np) # Run main path #2 (Should be the same as before) y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect( ) y_np = x_np * 2 assert np.allclose(y_val, y_np) assert np.allclose(x1_init_val, x1_np) assert np.allclose(x2_init_val, x2_np) x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np) x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np) assert np.allclose(x1_final_val, x1_np) assert np.allclose(x2_final_val, x2_np)
def test_4d_reduction(transformer_factory, input_axes): # Limiting maximum absolute value for tensors elements to 7.9. # See description in function test_exit_condition above is_flex = is_flex_factory(transformer_factory) clip_val = 7.9 if is_flex else 0 x_val = rng.randn_abs_clip(input_axes, clip_max=clip_val) x = ng.constant(x_val, input_axes) out1 = ng.sum(x, reduction_axes=input_axes[1]) out2 = ng.sum(x, reduction_axes=input_axes[3]) with executor([out1, out2]) as ex: graph_val1, graph_val2 = ex() np_val1 = np.sum(x_val, 1) np_val2 = np.sum(x_val, 3) ng.testing.assert_allclose(graph_val1, np_val1, rtol=1e-4, atol_multiplier=x_val.shape[1]) ng.testing.assert_allclose(graph_val2, np_val2, rtol=1e-4, atol_multiplier=x_val.shape[3])
def __call__(self, cost_func, variables=None, subgraph=None, warning=False): """ Arguments: cost_func (Op): The cost function to optimize variables (list of variables): List of variables to optimize subgraph (SubGraph): A subgraph instance containing all variables to optimize warning (bool): If True displays warning message if any variables specified do not participate in batch cost computation .. Note:: If subgraph is provided, the variables to optimize will be taken from it. Otherwise, they can be provided explicitly by passing a list as `variables`. If neither `subgraph` nor `variables` is provided, the variables to optimize will be all trainable variables on which `cost` depends. """ all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) if cost_func.axes.batch_axis() is None: batch_size = 1 else: batch_size = cost_func.axes.batch_axis().length # determine variables to optimize if subgraph is not None: if variables is not None: raise ValueError("variables and subgraph cannot both be specified.") variables = list(subgraph.variables.values()) if variables is None: variables = batch_cost.variables() elif variables is not None and warning is True: all_variables = batch_cost.variables() selected_variables = all_variables & set(variables) if len(selected_variables) < len(variables): logger.warn("not all selected variables participate in cost computation") # gradients grads = [ng.deriv(batch_cost, v) / batch_size for v in variables] scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm) # updates for variable, grad in zip(variables, grads): updates = self.variable_update(variable, grad, scale_factor, self.weight_clip_value) all_updates.append(updates) updates = ng.doall(all_updates) # grads = ng.doall(grads) # clips = ng.doall([ng.assign(variable, # clip_weight_value(variable, self.weight_clip_value)) # for variable in variables]) # return ng.sequential([grads, updates, clips, 0]) # return ng.sequential([grads, updates, 0]) return ng.sequential([updates, 0])
def test_rng_repetition(): """ Tests rng ops, to make sure they run every execution and not just initialization """ axes = ng.make_axes([ng.make_axis(2), ng.make_axis(2)]) y = ng.uniform(axes) mysum = ng.sum(y) trans = ng.transformers.make_transformer() rand_comp = trans.computation(mysum) val1 = rand_comp().copy() val2 = rand_comp().copy() assert val1 != val2 trans.close()
def test_sum(): H = ng.make_axis(length=2) W = ng.make_axis(length=2) H1 = ng.make_axis(length=1) W1 = ng.make_axis(length=4) input1 = ng.placeholder(axes=[H, W]) input2 = ng.placeholder(axes=[H1, W1]) # does reduction sum operation along axis[0]:H sum_op_1 = ng.sum(input1, reduction_axes=H) # sum elements across all the axis sum_op_2 = ng.sum(input2) with ExecutorFactory() as ex: _sum = ex.executor(sum_op_1, input1) _sum_val = _sum([[1, 2], [3, 4]]) assert np.array_equal(_sum_val, [4, 6]) _sum = ex.executor(sum_op_2, input2) _sum_val = _sum([1, 2, 3, 4]) assert np.array_equal(_sum_val, 10)
def test_flatten_deriv_simplified(): """ Test derivative with dot and flatten """ ax_N = ng.make_axis(length=3) ax_Y = ng.make_axis(length=2) x = ng.placeholder(ng.make_axes([ax_N])) w = ng.constant([5, 2], axes=ng.make_axes([ax_Y])) logits = ng.dot(x, w) cost = ng.sum(logits, reduction_axes=logits.axes) delta = 0.001 u = rng.uniform(.1, 5.0, x.axes) check_derivative(cost, x, delta, u, atol=1e-2, rtol=1e-2)
def test_4d_chained(transformer_factory, input_axes): # Limiting maximum absolute value for tensors elements to 7.9. # See description in function test_exit_condition above # Limitting minimum absolute value for tensors being input to reciprocal operation to 1/7.9 # # This is consequence of the above and flexpoint accuracy. # Numbers very small have poor absolute accuracy. When reciprocal of them is calculated the # results becomes very large and has even worse accuracy. When small numbers would be accepted # as an input to reciprocal in the test the absolute maximum value of the result is undefined # and so absolute tolerance. # To have possibility to set atol in the test and test could pass with it minimum element of # the tensor that is input to reciprocal operation has to be limited. is_flex = is_flex_factory(transformer_factory) clip_val_max = 7.9 if is_flex else 0 clip_val_min = 1.0 / 7.9 if is_flex else 0 x_val = rng.randn_abs_clip(input_axes, clip_min=clip_val_min, clip_max=clip_val_max) y_val = rng.randn_abs_clip(input_axes, clip_max=clip_val_max) x = ng.constant(x_val, input_axes) y = ng.constant(y_val, input_axes) im = ng.reciprocal(x) out = ng.sum(ng.add(im, y), reduction_axes=input_axes[0]) with executor(out) as ex: graph_val = ex() np_val = np.sum(np.add(np.reciprocal(x_val), y_val), 0) # atol_multiplier = 15 * x_val.shape[0] # # x_val.shape[0] is number elements added together in operation # ng.sum(X, reduction_axes=input_axes[0]) # # 15 is calculated the following way: # # Input tensor has values from the range 1/7.9 - 7.9 # For DEC=12 absolute error is equal to 0.5*2^-12 = 0.000122 # 1/7.9 = 0.126582 with this error becomes 0.126704 # Reciprocal of 1/7.9 is 7.9 # Reciprocal of 1/7.9 + err = 7.892389 # Absolute difference is 0.007611 # It is 15.2 times larger then atol limit 5e-4 from Argon transformer ng.testing.assert_allclose(graph_val, np_val, rtol=1e-4, atol_multiplier=15 * x_val.shape[0])
def test_idempotent_axes_c(): """ Test test axes transformations with autodiff, case c, with broadcast, slice, cast and dim-shuffle """ with ExecutorFactory() as ex: axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)]) result_axes = [ng.make_axis(length=axis.length) for axis in axes] # variable w = ng.variable(axes, initial_value=np.ones((3, 1))) # broadcast l / r, introducing dummy length 1 axes l = ng.broadcast(w, axes) r = ng.broadcast(w, axes) # slice axes_slice = [slice(None, None, None), slice(None, None, None)] l_sliced = ng.tensor_slice(l, axes_slice) r_sliced = ng.tensor_slice(r, axes_slice) # cast r r_sliced_casted = ng.cast_axes(r_sliced, axes) # perform add result = ng.add(l_sliced, r_sliced_casted) # cast / dimshuffle result = ng.cast_axes(result, result_axes) result = ng.axes_with_order(result, result_axes) # cost and grad cost = ng.sum(result, reduction_axes=result.axes) grad = ng.deriv(cost, w) grad_comp = ex.executor(grad) cost_comp = ex.executor(cost) cost_comp_ng = cost_comp() grad_comp_ng = grad_comp() grad_comp_np = np.ones((3, 1)) * 2. assert cost_comp_ng == 6.0 assert np.array_equal(grad_comp_ng, grad_comp_np)
def test_logreg(): # xs: (C, N), y: (N,) xs = np.array([[0.52, 0.88, 0.52, 0.74], [1.12, -1.08, 0.06, -2.49], [0.77, 0.15, -1.3, 1.39]]) ys = np.array([1, 1, 0, 1]) max_iter = 10 alpha = 0.1 thetas = np.array([0., 0., 0.]) np_logreg = NumpyLogreg(xs, ys, thetas) C, N = ng.make_axis(length=3), ng.make_axis(length=4) # input tensors xs_v = ng.placeholder((C, N)) ys_v = ng.placeholder([N]) alpha_v = ng.placeholder(()) thetas_var = ng.variable([C], initial_value=thetas) # define ops ys_pred = ng.sigmoid(ng.dot(thetas_var, xs_v)) log_likelihoods = ng.log(ys_pred) * ys_v + ng.log(1 - ys_pred) * (1 - ys_v) loss = -ng.sum(log_likelihoods, reduction_axes=[N]) grad_comp = ng.deriv(loss, thetas_var) weight_update = ng.sequential( [ng.assign(thetas_var, thetas_var - alpha_v * grad_comp), thetas_var]) # transformer with ExecutorFactory() as ex: train_eval_func = ex.executor([grad_comp, loss, weight_update], xs_v, ys_v, alpha_v) # evaluate for i in range(max_iter): grad_np, loss_np, thetas_np = np_logreg.optimize(alpha) grad_ng, loss_ng, thetas_ng = train_eval_func(xs, ys, alpha) ng.testing.assert_allclose(loss_np, loss_ng, rtol=1e-05, atol=1e-05, \ transformer_overwrite=False) ng.testing.assert_allclose(grad_np, grad_ng, rtol=1e-05, atol=1e-05, \ transformer_overwrite=False) ng.testing.assert_allclose(thetas_np, thetas_ng, rtol=1e-05, atol=1e-05, \ transformer_overwrite=False)
def test_dropout_bprop_single_comp(nin, batch_size, keep): # set inputs N = ng.make_axis(batch_size, name='N') F = ng.make_axis(nin, name='F') mul_factor = ng.placeholder(()) inp = ng.placeholder([F, N]) layer = Dropout(keep=keep) fprop = layer(inp * mul_factor) out_graph = ng.sum(fprop, out_axes=()) bprop = ng.deriv(out_graph, mul_factor) # create data x = np.random.uniform(size=(nin, batch_size)) # evaluate with ExecutorFactory() as ex: comp = ex.executor([fprop, bprop, layer.mask], inp, mul_factor) fout, bout, mask = comp(x, 2) # Calculate derivative by hand and compare ng.testing.assert_allclose(bout, (x * mask[:, None]).sum(), rtol=1e-6)
def test_idempotent_axes_b(): """ Test test axes transformations with autodiff, case b, with broadcast applied to the same tensor """ with ExecutorFactory() as ex: axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)]) w = ng.variable(axes, initial_value=np.ones((3, 1))) l = ng.broadcast(w, axes) r = ng.broadcast(w, axes) result = ng.add(l, r) result = ng.cast_axes(result, axes) cost = ng.sum(result, reduction_axes=axes) grad = ng.deriv(cost, w) grad_comp = ex.executor(grad) cost_comp = ex.executor(cost) assert cost_comp() == 6.0 assert np.array_equal(grad_comp(), np.ones((3, 1)) * 2.)
def test_idempotent_axes_a(): """ Test test axes transformations with autodiff, case a, reference test """ with ExecutorFactory() as ex: axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)]) w = ng.variable(axes, initial_value=np.ones((3, 1))) result = w + w result = ng.cast_axes(result, axes) cost = ng.sum(result, reduction_axes=axes) grad = ng.deriv(cost, w) grad_comp = ex.executor(grad) cost_comp = ex.executor(cost) cost_comp_val = cost_comp() grad_comp_val = grad_comp() grad_comp_np = np.ones((3, 1)) * 2. assert cost_comp_val == 6.0 assert np.array_equal(grad_comp_val, grad_comp_np)
def test_sum(num_units, sequence_length, batch_size): """ This tests for a non-deterministic error that arose in ng.sum following a dot product using the gpu transformer. """ shape = (num_units, sequence_length, batch_size) np_inp = np.random.uniform(-1, 1, shape) # Use an identity weight matrix on top of it np_w = np.eye(shape[0]) # Create ngraph versions inp = ng.constant(np_inp) reduction_axes = inp.axes[:-2] other_axes = inp.axes[-2:] new_axis = ng.make_axis(length=shape[0]) w_axes = ng.make_axes(new_axis) | reduction_axes w = ng.constant(np_w, axes=w_axes) # Reshape to do similar dot in numpy inp_reshape = np.reshape( np_inp, (np.prod(reduction_axes.lengths), np.prod(other_axes.lengths))) w_reshape = np.reshape(np_w, (new_axis.length, inp_reshape.shape[0])) # Reduce dimensions with identity weight matrix np_x = np.dot(w_reshape, inp_reshape) x = ng.dot(w, inp) # Sum over all but the first axis output_axes = ng.make_axes(x.axes[0]) y = ng.sum(x, out_axes=output_axes) np_y = np.sum(np_x, axis=1) with executor([y, x]) as f: y_val, x_val = f() assert_allclose(x_val.ravel(), np_x.ravel(), atol=1e-1) assert_allclose(y_val, np_y, atol=1e-1)
def test_dot_sum_backprop(): delta = 1e-3 rtol = atol = 1e-2 C = ng.make_axis(length=2).named('C') N = ng.make_axis(length=3, name='N') x_axes = ng.make_axes((C, N)) y_axes = ng.make_axes((C,)) x_np = np.random.random(x_axes.lengths).astype('float32') y_np = np.random.random(y_axes.lengths).astype('float32') x_np[...] = [[1.0, 0.0, 1.0], [2.0, 0.0, 3.0]] y_np[...] = [-1.0, 1.0] x = ng.placeholder(x_axes).named('x') y = ng.placeholder(y_axes).named('y') d = ng.dot(x, y) s = ng.sum(d, out_axes=()) with ExecutorFactory() as ex: s_fun = ex.executor(s, x, y) d_fun = ex.executor(d, x, y) dd_dx_fun_num = ex.numeric_derivative(d, x, delta, y) dd_dx_fun_sym = ex.derivative(d, x, y) dd_dy_fun_num = ex.numeric_derivative(d, y, delta, x) dd_dy_fun_sym = ex.derivative(d, y, x) ds_dx_fun_num = ex.numeric_derivative(s, x, delta, y) ds_dx_fun_sym = ex.derivative(s, x, y) ds_dy_fun_num = ex.numeric_derivative(s, y, delta, x) ds_dy_fun_sym = ex.derivative(s, y, x) # assert outputs are equal d_np = x_np.T.dot(y_np) d_val = d_fun(x_np, y_np) ng.testing.assert_allclose(d_np, d_val, rtol=rtol, atol=atol) s_np = np.sum(d_np) s_val = s_fun(x_np, y_np) ng.testing.assert_allclose(s_val, s_np, rtol=rtol, atol=atol) # assert derivative wrt to both tensors is the same when computed # symbolically by ngraph and numerically dd_dx_val_sym = dd_dx_fun_sym(x_np, y_np) dd_dy_val_sym = dd_dy_fun_sym(y_np, x_np) ds_dx_val_sym = ds_dx_fun_sym(x_np, y_np) ds_dy_val_sym = ds_dy_fun_sym(y_np, x_np) dd_dx_val_num = dd_dx_fun_num(x_np, y_np) dd_dy_val_num = dd_dy_fun_num(y_np, x_np) ds_dx_val_num = ds_dx_fun_num(x_np, y_np) ds_dy_val_num = ds_dy_fun_num(y_np, x_np) ng.testing.assert_allclose(dd_dx_val_num, dd_dx_val_sym, rtol=rtol, atol=atol) ng.testing.assert_allclose(dd_dy_val_num, dd_dy_val_sym, rtol=rtol, atol=atol) ng.testing.assert_allclose(ds_dx_val_num, ds_dx_val_sym, rtol=rtol, atol=atol) ng.testing.assert_allclose(ds_dy_val_num, ds_dy_val_sym, rtol=rtol, atol=atol)