Python deriv Exemples, neon.deriv Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_op_graph.py Projet : pruthvi1990/ngraph-neon

def test_deriv_missing_connection(N):
    """
    Taking the derivative of an expression with respect to a variable not
    used to compute the expression should raise an exception.
    """
    x = ng.variable([N])
    y = ng.variable([N])
    z = ng.variable([N])

    with pytest.raises(ValueError):
        ng.deriv(x + y, z)

Exemple #2

0

Afficher le fichier

def test_batchnorm_bprop(input_placeholder, bn_params):
    layer = BatchNorm(**bn_params)
    fprop = layer(input_placeholder)

    # Derivatives to check
    bprop_vars = [input_placeholder, layer.gamma, layer.beta]

    delta_placeholder = ng.placeholder(fprop.axes)
    bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars]

    with ExecutorFactory() as ex:
        # Create derivative executor
        bprop_function = ex.executor(bprops, input_placeholder,
                                     delta_placeholder)

        # Generate data
        x = rng.uniform(0, 1, input_placeholder.axes)
        delta = rng.uniform(-.1, .1, delta_placeholder.axes)

        # Compute reference bprop
        dx_ref, dgamma_ref, dbeta_ref = BatchNormReference(
            x, **bn_params).bprop(delta)

        # Compute ngraph bprop
        dx, dgamma, dbeta = bprop_function(x, delta)

        ng.testing.assert_allclose(dx, dx_ref, rtol=rtol, atol=atol)
        ng.testing.assert_allclose(dgamma, dgamma_ref, rtol=rtol, atol=atol)
        ng.testing.assert_allclose(dbeta, dbeta_ref, rtol=rtol, atol=atol)

Exemple #3

0

Afficher le fichier

Fichier : optimizer.py Projet : stjordanis/ngraph-neon

    def __call__(self, cost_func, variables=None, subgraph=None, warning=False):
        """
        Arguments:
            cost_func (Op): The cost function to optimize
            variables (list of variables): List of variables to optimize
            subgraph (SubGraph): A subgraph instance containing all variables to optimize
            warning (bool): If True displays warning message if any variables
                            specified do not participate in batch cost computation

        .. Note::
            If subgraph is provided, the variables to optimize will be taken from it.
            Otherwise, they can be provided explicitly by passing a list as `variables`.
            If neither `subgraph` nor `variables` is provided, the variables to optimize will be
            all trainable variables on which `cost` depends.
        """

        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        if cost_func.axes.batch_axis() is None:
            batch_size = 1
        else:
            batch_size = cost_func.axes.batch_axis().length

        # determine variables to optimize
        if subgraph is not None:
            if variables is not None:
                raise ValueError("variables and subgraph cannot both be specified.")
            variables = list(subgraph.variables.values())

        if variables is None:
            variables = batch_cost.variables()
        elif variables is not None and warning is True:
            all_variables = batch_cost.variables()
            selected_variables = all_variables & set(variables)
            if len(selected_variables) < len(variables):
                logger.warn("not all selected variables participate in cost computation")

        # gradients
        grads = [ng.deriv(batch_cost, v) / batch_size for v in variables]
        scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm)

        # updates
        for variable, grad in zip(variables, grads):
            updates = self.variable_update(variable, grad, scale_factor, self.weight_clip_value)
            all_updates.append(updates)
        updates = ng.doall(all_updates)
        # grads = ng.doall(grads)
        # clips = ng.doall([ng.assign(variable,
        #                             clip_weight_value(variable, self.weight_clip_value))
        #                  for variable in variables])
        # return ng.sequential([grads, updates, clips, 0])
        # return ng.sequential([grads, updates, 0])
        return ng.sequential([updates, 0])

Exemple #4

0

Afficher le fichier

Fichier : test_op_graph_transformer_dependent.py Projet : stjordanis/ngraph-neon

def test_concatenate(concatenate_variables):
    x_list, np_list, pos = concatenate_variables

    with ExecutorFactory() as ex:
        v = ng.concat_along_axis(x_list, x_list[0].axes[pos])
        d = ng.deriv(v,
                     x_list[0],
                     error=ng.constant(np.ones(v.axes.lengths), axes=v.axes))
        f = ex.executor([v, d])
        e_v, e_d = f()
        np_v = np.concatenate(np_list, axis=pos)
        ng.testing.assert_allclose(e_v.copy(), np_v)
        ng.testing.assert_allclose(e_d.copy(), np.ones(x_list[0].axes.lengths))

Exemple #5

0

Afficher le fichier

Fichier : test_axes_transformer_dependent.py Projet : stjordanis/ngraph-neon

def test_idempotent_axes_c():
    """
    Test test axes transformations with autodiff, case c, with broadcast,
    slice, cast and dim-shuffle
    """
    with ExecutorFactory() as ex:
        axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)])
        result_axes = [ng.make_axis(length=axis.length) for axis in axes]

        # variable
        w = ng.variable(axes, initial_value=np.ones((3, 1)))

        # broadcast l / r, introducing dummy length 1 axes
        l = ng.broadcast(w, axes)
        r = ng.broadcast(w, axes)

        # slice
        axes_slice = [slice(None, None, None), slice(None, None, None)]
        l_sliced = ng.tensor_slice(l, axes_slice)
        r_sliced = ng.tensor_slice(r, axes_slice)

        # cast r
        r_sliced_casted = ng.cast_axes(r_sliced, axes)

        # perform add
        result = ng.add(l_sliced, r_sliced_casted)

        # cast / dimshuffle
        result = ng.cast_axes(result, result_axes)
        result = ng.axes_with_order(result, result_axes)

        # cost and grad
        cost = ng.sum(result, reduction_axes=result.axes)
        grad = ng.deriv(cost, w)

        grad_comp = ex.executor(grad)
        cost_comp = ex.executor(cost)

        cost_comp_ng = cost_comp()
        grad_comp_ng = grad_comp()
        grad_comp_np = np.ones((3, 1)) * 2.
        assert cost_comp_ng == 6.0
        assert np.array_equal(grad_comp_ng, grad_comp_np)

Exemple #6

0

Afficher le fichier

Fichier : test_logreg.py Projet : stjordanis/ngraph-neon

def test_logreg():
    # xs: (C, N), y: (N,)
    xs = np.array([[0.52, 0.88, 0.52, 0.74], [1.12, -1.08, 0.06, -2.49],
                   [0.77, 0.15, -1.3, 1.39]])
    ys = np.array([1, 1, 0, 1])
    max_iter = 10
    alpha = 0.1
    thetas = np.array([0., 0., 0.])

    np_logreg = NumpyLogreg(xs, ys, thetas)

    C, N = ng.make_axis(length=3), ng.make_axis(length=4)

    # input tensors
    xs_v = ng.placeholder((C, N))
    ys_v = ng.placeholder([N])
    alpha_v = ng.placeholder(())
    thetas_var = ng.variable([C], initial_value=thetas)

    # define ops
    ys_pred = ng.sigmoid(ng.dot(thetas_var, xs_v))
    log_likelihoods = ng.log(ys_pred) * ys_v + ng.log(1 - ys_pred) * (1 - ys_v)
    loss = -ng.sum(log_likelihoods, reduction_axes=[N])
    grad_comp = ng.deriv(loss, thetas_var)
    weight_update = ng.sequential(
        [ng.assign(thetas_var, thetas_var - alpha_v * grad_comp), thetas_var])

    # transformer
    with ExecutorFactory() as ex:
        train_eval_func = ex.executor([grad_comp, loss, weight_update], xs_v,
                                      ys_v, alpha_v)

        # evaluate
        for i in range(max_iter):
            grad_np, loss_np, thetas_np = np_logreg.optimize(alpha)
            grad_ng, loss_ng, thetas_ng = train_eval_func(xs, ys, alpha)
            ng.testing.assert_allclose(loss_np, loss_ng, rtol=1e-05, atol=1e-05, \
                                       transformer_overwrite=False)
            ng.testing.assert_allclose(grad_np, grad_ng,  rtol=1e-05, atol=1e-05, \
                                       transformer_overwrite=False)
            ng.testing.assert_allclose(thetas_np, thetas_ng, rtol=1e-05, atol=1e-05, \
                                       transformer_overwrite=False)

Exemple #7

0

Afficher le fichier

def test_specific_slice_deriv():
    #
    with ExecutorFactory() as ex:
        A = ng.make_axis(name='A', length=3)
        B = ng.make_axis(name='B', length=4)
        np_shape = (A.length, B.length)
        x_np = np.empty(np_shape, dtype=np.float32)
        for i in range(A.length):
            for j in range(B.length):
                x_np[i, j] = 10 * i + j
        x_ng = ng.persistent_tensor([A, B], initial_value=x_np)
        for i in range(A.length):
            for j in range(B.length):
                slice = ng.tensor_slice(x_ng, (i, j))
                dslice_dx = ng.deriv(slice, x_ng)
                dslice_dx_fun = ex.executor(dslice_dx)
                dslice_dx_val = dslice_dx_fun()
                dslice_dx_np = np.zeros_like(x_np)
                dslice_dx_np[i, j] = 1
                ng.testing.assert_allclose(dslice_dx_val, dslice_dx_np)

Exemple #8

0

Afficher le fichier

Fichier : utils.py Projet : stjordanis/ngraph-neon

    def minimize(self, cost, variables):
        """
        Minimize cost by returning update Ops.

        Arguments:
            cost: The cost Op to be minimized
            variables: TODO

        Returns:
            A doall op containing setitems to variable ops.
        """

        assert cost is not None
        assert variables is not None

        return ng.doall([
            ng.assign(variable,
                      variable - self.compute_lr_op * ng.deriv(cost, variable))
            for variable in variables
        ])

Exemple #9

0

Afficher le fichier

def test_dropout_bprop_single_comp(nin, batch_size, keep):
    # set inputs
    N = ng.make_axis(batch_size, name='N')
    F = ng.make_axis(nin, name='F')

    mul_factor = ng.placeholder(())
    inp = ng.placeholder([F, N])
    layer = Dropout(keep=keep)
    fprop = layer(inp * mul_factor)
    out_graph = ng.sum(fprop, out_axes=())
    bprop = ng.deriv(out_graph, mul_factor)

    # create data
    x = np.random.uniform(size=(nin, batch_size))

    # evaluate
    with ExecutorFactory() as ex:
        comp = ex.executor([fprop, bprop, layer.mask], inp, mul_factor)
        fout, bout, mask = comp(x, 2)
        # Calculate derivative by hand and compare
        ng.testing.assert_allclose(bout, (x * mask[:, None]).sum(), rtol=1e-6)

Exemple #10

0

Afficher le fichier

Fichier : test_axes_transformer_dependent.py Projet : stjordanis/ngraph-neon

def test_idempotent_axes_b():
    """
    Test test axes transformations with autodiff, case b, with broadcast applied
    to the same tensor
    """
    with ExecutorFactory() as ex:
        axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)])

        w = ng.variable(axes, initial_value=np.ones((3, 1)))
        l = ng.broadcast(w, axes)
        r = ng.broadcast(w, axes)
        result = ng.add(l, r)

        result = ng.cast_axes(result, axes)
        cost = ng.sum(result, reduction_axes=axes)
        grad = ng.deriv(cost, w)

        grad_comp = ex.executor(grad)
        cost_comp = ex.executor(cost)

        assert cost_comp() == 6.0
        assert np.array_equal(grad_comp(), np.ones((3, 1)) * 2.)

Exemple #11

0

Afficher le fichier

Fichier : test_axes_transformer_dependent.py Projet : stjordanis/ngraph-neon

def test_idempotent_axes_a():
    """
    Test test axes transformations with autodiff, case a, reference test
    """
    with ExecutorFactory() as ex:
        axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)])

        w = ng.variable(axes, initial_value=np.ones((3, 1)))
        result = w + w

        result = ng.cast_axes(result, axes)
        cost = ng.sum(result, reduction_axes=axes)
        grad = ng.deriv(cost, w)

        grad_comp = ex.executor(grad)
        cost_comp = ex.executor(cost)

        cost_comp_val = cost_comp()
        grad_comp_val = grad_comp()
        grad_comp_np = np.ones((3, 1)) * 2.

        assert cost_comp_val == 6.0
        assert np.array_equal(grad_comp_val, grad_comp_np)

Exemple #12

0

Afficher le fichier

Fichier : test_rnn_cells.py Projet : stjordanis/ngraph-neon

def test_rnn_deriv_ref(sequence_length, input_size, hidden_size, batch_size,
                       return_sequence, weight_initializer, bias_initializer,
                       init_state):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"
    assert return_sequence is True, "the reference rnn only supports sequences for deriv"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Compute reference numpy RNN
    rnn_ref = RefRecurrent(input_size,
                           hidden_size,
                           return_sequence=return_sequence)
    rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.bh.shape))

    # Prepare deltas for gradient check
    output_shape = (hidden_size, sequence_length, batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    dW_in, dW_rec, db = rnn_ref.lossFun(input_value.transpose([1, 0, 2]),
                                        deltas.copy().transpose([1, 0, 2]),
                                        init_states=init_state_value)[:3]

    # Generate ngraph RNN
    rnn_ng = RNNCell(hidden_size,
                     init=W_in,
                     init_h2h=W_rec,
                     activation=Tanh(),
                     reset_cells=True)

    # fprop ngraph RNN
    num_steps = input_placeholder.axes.recurrent_axis().length
    init_states = {'h': init_state} if init_state is not None else init_state
    out_ng = unroll(rnn_ng,
                    num_steps,
                    input_placeholder,
                    init_states=init_states,
                    return_sequence=return_sequence)
    deltas_constant = ng.constant(deltas, axes=out_ng.axes)
    params = [(rnn_ng.i2h.linear.W, W_in), (rnn_ng.h2h.W, W_rec),
              (rnn_ng.i2h.bias.W, b)]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        for px, _ in params:
            update = ng.deriv(out_ng, px, error=deltas_constant)
            if init_state is not None:
                param_updates.append(
                    ex.executor(update, input_placeholder, init_state))
            else:
                param_updates.append(ex.executor(update, input_placeholder))

        for update_fun, ref_val in zip(param_updates, [dW_in, dW_rec, db]):
            if init_state is not None:
                grad_neon = update_fun(input_value, init_state_value)
            else:
                grad_neon = update_fun(input_value)
            ng.testing.assert_allclose(grad_neon,
                                       ref_val.squeeze(),
                                       rtol=bprop_rtol,
                                       atol=bprop_atol)

Exemple #13

0

Afficher le fichier

    def derivative(self, f, px, *parameters):
        """
        Full derivative of f wrt placeholder px

        Arguments:
          f: TODO
          px: TODO
          parameters: TODO

        Returns:

        """
        fshape = f.axes.lengths
        xshape = px.axes.lengths

        # print "============="
        # for op in Op.ordered_ops([dfdx]):
        #     print '-----'
        #     print op, op.axes
        #     print op.args
        #     print '------'
        # print "============="

        if len(fshape) is 0:
            return self.transformer.computation(ng.deriv(f, px), px,
                                                *parameters)
        else:
            initial_adjoint = ng.placeholder(f.axes).named('adj')
            adjoint = np.zeros(fshape, dtype=f.dtype)
            dfdx = ng.deriv(f, px, error=initial_adjoint)
            comp = self.transformer.computation(dfdx, initial_adjoint, px,
                                                *parameters)

            def helper(x, *args):
                dfdxshape = list(fshape)
                dfdxshape.extend(xshape)
                npdfdx = np.empty(dfdxshape, dtype=x.dtype)

                dindex = [0 for _ in fshape]
                dindex.extend([slice(None) for _ in xshape])

                idxiter = np.nditer(adjoint,
                                    flags=['multi_index'],
                                    op_flags=['readwrite'])
                for dfdxiter in idxiter:
                    dfdxiter[...] = 1

                    df = comp(adjoint, x, *args)

                    if is_flex_transformer(comp.transformer):
                        reset_flex_entries(comp)

                    # import pytest; pytest.set_trace()
                    # with open("code_sum.py", "w") as f: f.write(comp.transformer.code.code)

                    dindex[0:len(fshape)] = idxiter.multi_index
                    npdfdx[tuple(dindex)] = df
                    dfdxiter[...] = 0

                return npdfdx

            return helper

Exemple #14

0

Afficher le fichier

def test_recurrent_batchnorm_bprop(RNN, recurrent_input, output_size,
                                   bn_params):
    """Compare bprop gated RNN with batch norm to numpy batch norm followed by rnn without"""

    helper = RNNHelper(recurrent_input, output_size, RNN, bn_params)

    # Get rnn + batch norm bprop graph
    fprop = helper.rnn(recurrent_input)
    bprop_vars = [recurrent_input, helper.gamma, helper.beta]

    # Get bprop graph
    delta_placeholder = ng.placeholder(fprop.axes)
    bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars]

    # Get reference graphs
    reference_fprop = helper.reference_rnn(helper.reference_input)

    # Handle the case where we have gates in the RNN object
    bprop_vars = [helper.reference_input]
    if helper.has_gates:
        bprop_vars.append(helper.get_ancestor_op(reference_fprop))

    reference_delta_placeholder = ng.placeholder(reference_fprop.axes)
    reference_bprop = [
        ng.deriv(reference_fprop, var, reference_delta_placeholder)
        for var in bprop_vars
    ]

    # Begin execution
    with ExecutorFactory() as ex:
        bprop_function = ex.executor(bprops, recurrent_input,
                                     delta_placeholder)
        reference_function = ex.executor(reference_bprop,
                                         helper.reference_input,
                                         reference_delta_placeholder)

        # Create data
        input_value = rng.uniform(0, 1, recurrent_input.axes)
        delta = rng.uniform(-.1, .1, fprop.axes)

        # Compute reference weighted input
        weighted_input = np.dot(helper.W_in, input_value.swapaxes(0, 1))

        # Set the reduction axes used for reference
        bn_params['axis'] = (1, 2)

        # Get reference batch normed input
        batch_norm_reference = BatchNormReference(weighted_input, **bn_params)
        normed_input = batch_norm_reference.fprop[0]

        # Reference backprop through RNN
        reference_result = reference_function(normed_input, delta)
        # This is because of a HETR bug where return collections aren't handled properly
        if isinstance(reference_result, tuple):
            rnn_delta = reference_result[0]
        else:
            rnn_delta = reference_result

        # Reference backprop through BN
        dx_ref, dgamma_ref, dbeta_ref = batch_norm_reference.bprop(rnn_delta)

        # Backprop through reference batch norm for a single gate
        if helper.has_gates:
            rnn_gate_delta = reference_result[1]
            _, dgamma_ref, dbeta_ref = batch_norm_reference.bprop(
                rnn_gate_delta)

        # Backprop through weighted input
        dx_ref = np.dot(helper.W_in.T, dx_ref.swapaxes(0, 1))

        # Compute ngraph bprop
        dx, dgamma, dbeta = bprop_function(input_value, delta)

        ng.testing.assert_allclose(dx, dx_ref, rtol=rtol, atol=recurrent_atol)
        ng.testing.assert_allclose(dgamma,
                                   dgamma_ref,
                                   rtol=rtol,
                                   atol=recurrent_atol)
        ng.testing.assert_allclose(dbeta,
                                   dbeta_ref,
                                   rtol=rtol,
                                   atol=recurrent_atol)

Exemple #15

0

Afficher le fichier

Fichier : test_recurrent.py Projet : stjordanis/ngraph-neon

def test_seq2seq_deriv_ref(batch_size, sequence_length_enc,
                           sequence_length_dec, input_size, hidden_size,
                           weight_initializer, bias_initializer):

    # TODO: are these assumptions true?
    assert batch_size == 1, "the seq2seq reference implementation only support batch size 1"

    # Get input placeholders and numpy arrays
    input_placeholder_enc, input_value_enc, = \
        make_placeholder(input_size, sequence_length_enc, batch_size)
    input_placeholder_dec, input_value_dec, = \
        make_placeholder(input_size, sequence_length_dec, batch_size)

    # Construct encoder weights
    W_in_enc, W_rec_enc, b_enc, _, _ = make_weights(input_placeholder_enc,
                                                    hidden_size,
                                                    weight_initializer,
                                                    bias_initializer,
                                                    init_state=False)

    # Construct decoder weights
    W_in_dec, W_rec_dec, b_dec, _, _ = make_weights(input_placeholder_dec,
                                                    hidden_size,
                                                    weight_initializer,
                                                    bias_initializer,
                                                    init_state=False)

    # Reference numpy seq2seq
    seq2seq_ref = RefSeq2Seq(input_size,
                             hidden_size,
                             decoder_return_sequence=True)
    seq2seq_ref.set_weights(W_in_enc, W_rec_enc,
                            b_enc.reshape(seq2seq_ref.bh_enc.shape), W_in_dec,
                            W_rec_dec, b_dec.reshape(seq2seq_ref.bh_dec.shape))

    # Prepare deltas for gradient check
    output_shape = (hidden_size, sequence_length_dec, batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    dW_in_enc, dW_rec_enc, db_enc, dW_in_dec, dW_rec_dec, db_dec, encoding_ref, hs_return_dec = \
        seq2seq_ref.lossFun(input_value_enc.transpose([1, 0, 2]),
                            input_value_dec.transpose([1, 0, 2]),
                            deltas.copy().transpose([1, 0, 2]))

    # Generate ngraph Seq2Seq
    rnn_enc_ng = Recurrent(hidden_size,
                           init=W_in_enc,
                           init_inner=W_rec_enc,
                           activation=Tanh(),
                           reset_cells=True,
                           return_sequence=False)
    rnn_dec_ng = Recurrent(hidden_size,
                           init=W_in_dec,
                           init_inner=W_rec_dec,
                           activation=Tanh(),
                           reset_cells=True,
                           return_sequence=True)

    # ngraph fprop graph
    encoding_ng = rnn_enc_ng(input_placeholder_enc, init_state=None)
    output_ng = rnn_dec_ng(input_placeholder_dec, init_state=encoding_ng)

    deltas_constant = ng.constant(deltas, axes=output_ng.axes)
    params = [(rnn_dec_ng.b, db_dec), (rnn_dec_ng.W_input, dW_in_dec),
              (rnn_dec_ng.W_recur, dW_rec_dec), (rnn_enc_ng.b, db_enc),
              (rnn_enc_ng.W_input, dW_in_enc),
              (rnn_enc_ng.W_recur, dW_rec_enc)]

    with ExecutorFactory() as ex:

        # fprop computations
        fprop_fun = ex.executor([encoding_ng, output_ng],
                                input_placeholder_enc, input_placeholder_dec)

        # gradient computations
        update_funs = []
        for px, _ in params:
            update = ng.deriv(output_ng, px, error=deltas_constant)
            update_funs.append(
                ex.executor(update, input_placeholder_enc,
                            input_placeholder_dec))

        # check forward pass
        encoding, output = fprop_fun(input_value_enc, input_value_dec)
        ng.testing.assert_allclose(encoding, encoding_ref, rtol=1e-5, atol=1e-5, \
                                   transformer_overwrite=False)
        ng.testing.assert_allclose(np.squeeze(output), np.squeeze(hs_return_dec), \
                                   rtol=1e-5, atol=1e-5,
                                   transformer_overwrite=False)

        # check gradient computations
        for update_fun, (_, deriv_ref_val) in zip(update_funs, params):
            grad_neon = update_fun(input_value_enc, input_value_dec)
            ng.testing.assert_allclose(grad_neon,
                                       deriv_ref_val.squeeze(),
                                       rtol=1e-5,
                                       atol=1e-4)