def test_negative_log_likelihood_vs_softmax_cross_entropy( data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) scores = Tensor(s) nll = negative_log_likelihood(mg.log(mg.nnet.softmax(scores)), y_true) nll.backward() cross_entropy_scores = Tensor(s) ce = softmax_crossentropy(cross_entropy_scores, y_true) ce.backward() assert_allclose(nll.data, ce.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, cross_entropy_scores.grad, atol=1e-5, rtol=1e-5)
def test_softmax_crossentropy(data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) scores = Tensor(s) softmax_cross = softmax_crossentropy(scores, y_true, constant=False) softmax_cross.backward() mygrad_scores = Tensor(s) probs = softmax(mygrad_scores) correct_labels = (range(len(y_true)), y_true.data if labels_as_tensor else y_true) truth = np.zeros(mygrad_scores.shape) truth[correct_labels] = 1 mygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum() mygrad_cross.backward() assert_allclose(softmax_cross.data, mygrad_cross.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, mygrad_scores.grad, atol=1e-5, rtol=1e-5)
def test_weighted_negative_log_likelihood_vs_softmax_cross_entropy( data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(min_side=1, max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) weights = data.draw( hnp.arrays( shape=(s.shape[1], ), dtype=float, elements=st.floats(1e-8, 100), )) scores = Tensor(s) weights = Tensor(weights) for score, y in zip(scores, y_true): score = mg.log(mg.nnet.softmax(score.reshape(1, -1))) y = y.reshape(-1) nll = negative_log_likelihood(score, y) weighted_nll = negative_log_likelihood(score, y, weights=weights) assert np.isclose(weighted_nll.data, weights[y.data].data * nll.data)
def test_transpose_method(): dat = np.arange(24).reshape(2, 3, 4) for axes in permutations(range(3)): # passing tuple of integers x = Tensor(dat) f = x.transpose(axes) f.backward(dat.transpose(axes)) assert_allclose(f.data, dat.transpose(axes)) assert_allclose(x.grad, dat) # passing integers directly x = Tensor(dat) f = x.transpose(*axes) f.backward(dat.transpose(axes)) assert_allclose(f.data, dat.transpose(axes), err_msg="{}".format(axes)) assert_allclose(x.grad, dat, err_msg="{}".format(axes)) # passing integers directly x = Tensor(dat) f = x.transpose() f.backward(dat.transpose()) assert_allclose(f.data, dat.transpose()) assert_allclose(x.grad, dat) # check that constant=True works x = Tensor(dat) f = x.transpose(constant=True) assert f.constant and not x.constant f = x.transpose(1, 0, 2, constant=True) assert f.constant and not x.constant
def test_softmax_crossentropy(data): """ Test the built-in implementation of multiclass hinge against the pure pygrad version""" s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) l = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), )) scores = Tensor(s) softmax_cross = softmax_crossentropy(scores, l, constant=False) softmax_cross.backward() pygrad_scores = Tensor(s) probs = softmax(pygrad_scores) correct_labels = (range(len(l)), l) truth = np.zeros(pygrad_scores.shape) truth[correct_labels] = 1 pygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum() pygrad_cross.backward() assert_allclose(softmax_cross.data, pygrad_cross.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, pygrad_scores.grad, atol=1e-5, rtol=1e-5)
def test_maxpool(): # case 1 x = np.zeros((1, 1, 2, 2)) pool = 2 stride = 1 a = Tensor(x) f = max_pool(a, pool, stride) assert np.all(f.data == np.zeros((1, 1, 1, 1))) f.backward() assert np.all(a.grad == np.array([1, 0, 0, 0]).reshape(1, 1, 2, 2)) # case 2 x = np.arange(2 * 4 * 3).reshape(1, 2, 4, 3) x *= x[..., ::-1, ::-1] x[0, 0, 0, 1] = 400 out = np.array([[[[400, 400], [30, 28]], [[304, 306], [306, 304]]]]) pool = 2 stride = [2, 1] a = Tensor(x) f = max_pool(a, pool, stride) assert np.all(f.data == out) f.backward(np.arange(1, 9).reshape(1, 2, 2, 2)) da = np.array([[[[0, 3, 0], [0, 0, 0], [3, 4, 0], [0, 0, 0]], [[0, 0, 0], [0, 5, 6], [7, 8, 0], [0, 0, 0]]]]) assert np.all(da == a.grad)
def test_conv2d(data, mem, choice_1, choice_2, choice_3): f = choice_1([1, 2, 3]) c = choice_2([1, 2]) #w, pad, stride ws, pad, stride = choice_3([(1, 0, 4), (1, 0, 1), (3, 1, 2), (5, 0, 1)]) dat = data.draw( hnp.arrays(shape=(2, c, 5, 5), dtype=float, elements=st.floats(1, 100))) w_dat = data.draw( hnp.arrays(shape=(f, c, ws, ws), dtype=float, elements=st.floats(1, 100))) x = Tensor(dat) w = Tensor(w_dat) f = conv2d(x, w, stride, pad, memory_constrained=mem) b = np.zeros((w.shape[0], )) out, _ = conv_forward_naive(dat, w_dat, b, {'stride': stride, 'pad': pad}) assert np.allclose(f.data, out) dout = data.draw( hnp.arrays(shape=f.shape, dtype=float, elements=st.floats(-100, 100))) f.backward(dout) dx, dw, db = conv_backward_naive(dout, _) assert np.allclose(x.grad, dx) assert np.allclose(w.grad, dw)
def test_minimum_bkwd(x, data): """ index conforms strictly to basic indexing """ y = data.draw(hnp.arrays(shape=broadcastable_shape(x.shape, max_dim=5), dtype=float, elements=st.floats(-10., 10.)), label="y") assume(not np.any(np.isclose(x, y))) x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) o = minimum(x_arr, y_arr) grad = data.draw(hnp.arrays(shape=o.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad") (o * grad).sum().backward() dx, dy = numerical_gradient_full(np.minimum, x, y, back_grad=grad, as_decimal=True) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_nonconstant_s0_raises(s0, dropout: float, out_constant: bool): T, N, C, D = 5, 1, 3, 2 X = Tensor(np.random.rand(T, N, C)) Wz, Wr, Wh = Tensor(np.random.rand(3, D, D)) Uz, Ur, Uh = Tensor(np.random.rand(3, C, D)) bz, br, bh = Tensor(np.random.rand(3, D)) with does_not_raise() if ( out_constant or s0 is None or isinstance(s0, np.ndarray) or s0.constant ) else pytest.raises(ValueError): gru( X, Uz, Wz, bz, Ur, Wr, br, Uh, Wh, bh, s0=s0, dropout=dropout, constant=out_constant, )
def test_add_backward(data): a = data.draw( hnp.arrays(shape=hnp.array_shapes(max_side=3, max_dims=3), dtype=float, elements=st.floats(0.01, 100))) b = data.draw( hnp.arrays(shape=a.shape, dtype=float, elements=st.floats(0.01, 100))) grad = data.draw( hnp.arrays(shape=a.shape, dtype=float, elements=st.floats(-100, 100))) x = Tensor(a) c = x / b c.backward(grad) assert np.allclose(x.grad, grad / b) x = Tensor(b) c = a / x c.backward(grad) assert np.allclose(x.grad, grad * -a / x.data**2) x = Tensor(a) c = divide(x, b) c.backward(grad) assert np.allclose(x.grad, grad / b) x = Tensor(b) c = divide(a, x) c.backward(grad) assert np.allclose(x.grad, grad * -a / x.data**2)
def test_setitem_sanity_check(x_constant, y_constant, data): """ Ensure proper setitem behavior for all combinations of constant/variable Tensors""" x = Tensor([1.0, 2.0, 3.0, 4.0], constant=x_constant) w = 4 * x as_tensor = data.draw(st.booleans()) if y_constant else True y = Tensor([1.0, 0.0], constant=y_constant) if as_tensor else np.array([1.0, 0.0]) w[::2] = np.array([-1.0, -2.0]) * y assert_allclose(np.array((-1.0, 8.0, 0.0, 16.0)), w.data) w.sum().backward() assert isinstance(w, Tensor) assert_allclose(w.data, np.array([-1.0, 8.0, 0.0, 16.0])) assert w.constant is (x.constant and (not as_tensor or y.constant)) if x.constant: assert x.grad is None else: assert_allclose(x.grad, np.array([0.0, 4.0, 0.0, 4.0])) if as_tensor: if y.constant: assert y.grad is None else: assert_allclose(y.grad, np.array([-1.0, -2.0])) w.null_gradients() assert x.grad is None, "null_gradients failed" if as_tensor: assert y.grad is None, "null_gradients failed"
def test_multiclass_hinge(data): """ Test the built-in implementation of multiclass hinge against the pure pygrad version""" s = data.draw( hnp.arrays(shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100))) l = data.draw( hnp.arrays(shape=(s.shape[0], ), dtype=int, elements=st.integers(min_value=0, max_value=s.shape[1] - 1))) hinge_scores = Tensor(s) hinge_loss = multiclass_hinge(hinge_scores, l, constant=False) hinge_loss.backward() pygrad_scores = Tensor(s) correct_labels = (range(len(l)), l) correct_class_scores = pygrad_scores[correct_labels] # Nx1 Lij = pygrad_scores - correct_class_scores[:, np.newaxis] + 1. # NxC margins Lij[Lij <= 0] = 0 Lij[correct_labels] = 0 pygrad_loss = Lij.sum() / pygrad_scores.shape[0] pygrad_loss.backward() assert_allclose(hinge_loss.data, pygrad_loss.data) assert_allclose(pygrad_scores.grad, hinge_scores.grad)
def test_squeeze(x, data): axes = data.draw(valid_axes(x.ndim), label="axes") x_arr = Tensor(np.copy(x)) x_arr2 = Tensor(np.copy(x)) def f(x): return np.squeeze(x, axes) try: numpy_out = np.squeeze(x, axes) except ValueError as e: with raises(ValueError): squeeze(x_arr, axes, constant=False) return o = squeeze(x_arr, axes, constant=False) o_method = x_arr2.squeeze(axes) assert_allclose(o.data, numpy_out) assert_allclose(o_method.data, numpy_out) grad = data.draw( hnp.arrays(shape=o.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) o.backward(grad) o_method.backward(grad) dx, = numerical_gradient_full(f, x, back_grad=grad) assert_allclose(x_arr.grad, dx) assert_allclose(x_arr2.grad, dx)
def test_max_back(a, num_axes, keepdims): """ Test Tensor.max for arbitrary data, axis, and keepdim""" if num_axes == 0: axes = None else: axes = np.random.choice(range(0, a.ndim), size=min(num_axes, a.ndim), replace=False) axes = tuple(sorted(axes)) # single global maximum if axes is None or axes == tuple(range(a.ndim)): index = tuple(np.random.choice(i.flat) for i in np.indices(a.shape)) a[index] = a.max() + 1 grad = np.zeros_like(a) grad[index] = 1 a = Tensor(a) out = a.max(axis=axes, keepdims=keepdims) out.backward() assert np.allclose(grad, a.grad) return None # explicitly place maxima within tensor static_axes = tuple(sorted(set(range(a.ndim)) - set(axes))) static_shape = tuple(a.shape[i] for i in static_axes) red_shapes = tuple(a.shape[i] for i in axes) sorter = np.argsort(static_axes + axes) # generate indices to span static axes static_indices = tuple(i for i in np.indices(static_shape)) # generate random index-runs along reduction axes choose_indices = tuple( np.random.choice(range(i), size=static_indices[0].shape) for i in red_shapes) # create index tuple the selects random runs along reduction axes static_indices += choose_indices indices = [] for i in sorter: indices.append(static_indices[i]) indices = tuple(indices) # place extrema a[indices] = a.max() + np.random.rand(*indices[0].shape) a = Tensor(a) out = a.max(axis=axes) # break degeneracy amongst grad values tmp = np.arange(1, out.data.size + 1).reshape(out.shape) out2 = out * tmp out2.backward() grad = np.zeros_like(a.data) grad[indices] = np.arange(1, out.data.size + 1).reshape(out.shape) assert np.allclose(grad, a.grad)
def test_no_mutate(): """ Ensure setitem doesn't mutate variable non-constant tensor""" x = Tensor([1.0, 2.0]) y = Tensor([3.0, 4.0]) x + y y[:] = 0 y_old = x._ops.pop().variables[-1] # version of y that participated in x + y assert_allclose(np.array([3.0, 4.0]), y_old.data) assert_allclose(np.array([0.0, 0.0]), y.data)
def test_setitem_basic_index(x: np.ndarray, data: st.DataObject): """ index conforms strictly to basic indexing """ index = data.draw(basic_indices(x.shape), label="index") o = np.asarray(x[index]) note("x[index]: {}".format(o)) y = data.draw( ( hnp.arrays( # Permit shapes that are broadcast-compatible with x[index] # The only excess dimensions permitted in this shape are # leading singletons shape=broadcastable_shapes(o.shape).map( lambda _x: tuple( 1 if (len(_x) - n) > o.ndim else s for n, s in enumerate(_x) ) ), dtype=float, elements=st.floats(-10.0, 10.0), ) if o.shape and o.size else st.floats(-10.0, 10.0).map(lambda _x: np.array(_x)) ), label="y", ) x0 = np.copy(x) y0 = np.copy(y) x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) x1_arr = x_arr[:] try: x0[index] = y0 # don't permit invalid set-items except ValueError: assume(False) return grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x1_arr[index] = y_arr (x1_arr * grad).sum().backward() assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full( setitem, x, y, back_grad=grad, kwargs=dict(index=index) ) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_practical_scalar_only(constant, operation): a = Tensor([1, 2, 3], constant=constant) b = Tensor(3, constant=constant) out = getattr(a, "__" + operation + "__")(b) if constant: out.backward() else: with raises(InvalidBackprop): out.backward()
def test_add_fwd(data): a = data.draw(hnp.arrays(shape=hnp.array_shapes(max_side=3, max_dims=3), dtype=float, elements=st.floats(-100, 100))) b = data.draw(hnp.arrays(shape=a.shape, dtype=float, elements=st.floats(-100, 100))) result = a - b assert np.allclose((Tensor(a) - b).data, result) assert np.allclose((a - Tensor(b)).data, result) assert np.allclose((Tensor(a) - Tensor(b)).data, result)
def test_setitem_bool_axes_index(x, data): """ index consists of boolean arrays specified for each axis """ index = data.draw( st.tuples(hnp.arrays(shape=(3, ), dtype=bool), hnp.arrays(shape=(3, ), dtype=bool))) try: o = np.asarray(x[index]) except IndexError: return None y = data.draw( hnp.arrays( shape=broadcastable_shapes( o.shape, max_dims=o.ndim, max_side=max(o.shape)), dtype=float, elements=st.floats(-10.0, 10.0), ) if o.shape and o.size else st.floats( -10.0, 10.0).map(lambda _x: np.array(_x)), label="y", ) grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x0 = np.copy(x) y0 = np.copy(y) try: x0[index] = y0 # don't permit invalid set-items except ValueError: assume(False) return x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) x1_arr = x_arr[:] x1_arr[index] = y_arr (x1_arr * grad).sum().backward() assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full(setitem, x, y, back_grad=grad, kwargs=dict(index=index)) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_setitem_sanity_check2(): x = Tensor([1.0, 2.0, 3.0, 4.0]) y = Tensor([-1.0, -2.0, -3.0, -4.0]) z = x * y y[:] = 0 z.backward() assert_allclose(np.array([-1.0, -2.0, -3.0, -4.0]), x.grad) assert_allclose(np.array([0.0, 0.0, 0.0, 0.0]), y.data) assert y.grad is None
def test_recurrent(data, choice): X = data.draw( hnp.arrays(shape=hnp.array_shapes(max_side=3, min_dims=3, max_dims=3), dtype=float, elements=st.floats(-10, 10))) T, N, C = X.shape D = choice(list(range(1, 5))) s0 = data.draw( hnp.arrays(shape=(N, D), dtype=float, elements=st.floats(0.0, 0.0))) W = data.draw( hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0))) U = data.draw( hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0))) V = data.draw( hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0))) W = Tensor(W) W2 = W.__copy__() U = Tensor(U) U2 = U.__copy__() V = Tensor(V) V2 = V.__copy__() s0 = Tensor(s0) s2 = s0.__copy__() rec = RecurrentUnit(U, W, V, T) if X.shape[0] > 1: ls = add_sequence(*(dense(i, V).sum() for i in rec(X)[1:])) else: ls = dense(rec(X)[1], V).sum() ls.backward() s = s2 ls2 = 0 for n, x in enumerate(X): s = tanh(dense(x, U2) + dense(s, W2)) o = dense(s, V2) ls2 += o.sum() ls2.backward() assert np.allclose(W.data, W2.data, atol=1E-3) assert np.allclose(W.grad, W2.grad, atol=1E-3) assert np.allclose(U.data, U2.data, atol=1E-3) assert np.allclose(U.grad, U2.grad, atol=1E-3) assert np.allclose(V.data, V2.data, atol=1E-3) assert np.allclose(V.grad, V2.grad, atol=1E-3)
def test_add_fwd(data): a = data.draw( hnp.arrays(shape=hnp.array_shapes(max_side=3, max_dims=3), dtype=float, elements=st.floats(0.01, 100))) b = data.draw( hnp.arrays(shape=a.shape, dtype=float, elements=st.floats(0.01, 100))) result = a / b assert np.allclose((Tensor(a) / b).data, result) assert np.allclose((a / Tensor(b)).data, result) assert np.allclose(divide(Tensor(a), b).data, result) assert np.allclose(divide(a, Tensor(b)).data, result)
def test_minimum_bkwd_equal(): """ regression test for documented behavior of minimum/minimum where x == y""" x = Tensor([1.0, 0.0, 2.0]) y = Tensor([2.0, 0.0, 1.0]) o = minimum(x, y) o.backward() assert_allclose(x.grad, [1.0, 0.0, 0.0]) assert_allclose(y.grad, [0.0, 0.0, 1.0]) o.null_gradients()
def test_constant_arg(): """ test that the `constant` arg works as intended in Tensor._op""" a = Tensor(1) b = Tensor(1) o_true = dummy(a, b, constant=True) assert o_true.constant is True assert a._ops == set() assert b._ops == set() o_false = dummy(a, b, constant=False) assert o_false.constant is False assert a._ops == {o_false.creator} assert b._ops == {o_false.creator}
def test_subtract_broadcast(): a = Tensor([3]) b = Tensor([1, 2, 3]) c = Tensor(2) f = a - b - c g = f.sum(keepdims=False) g.backward() assert np.allclose(f.data, a.data - b.data - c.data) assert a.grad.shape == (1,) assert np.allclose(a.grad, np.array([3])) assert b.grad.shape == (3,) assert np.allclose(b.grad, np.array([-1, -1, -1])) assert c.grad.ndim == 0 assert np.allclose(c.grad, np.array(-3))
def test_setitem_mixed_index(x, data): """ index is mixes basic and advanced int-array indexing""" index = (slice(1, 2), [1, 2]) o = np.asarray(x[index]) y = data.draw( hnp.arrays( shape=broadcastable_shapes(o.shape, max_dims=o.ndim), dtype=float, elements=st.floats(-10.0, 10.0), ), label="y", ) grad = data.draw( hnp.arrays(shape=x.shape, dtype=float, elements=st.floats(1, 10), unique=True), label="grad", ) x0 = np.copy(x) y0 = np.copy(y) try: x0[index] = y0 # don't permit invalid set-items except ValueError: assume(False) return x_arr = Tensor(np.copy(x)) y_arr = Tensor(np.copy(y)) x1_arr = x_arr[:] x1_arr[index] = y_arr (x1_arr * grad).sum().backward() x0[index] = y0 assert_allclose(x1_arr.data, x0) assert_allclose(y_arr.data, y0) dx, dy = numerical_gradient_full(setitem, x, y, back_grad=grad, kwargs=dict(index=index)) assert_allclose(x_arr.grad, dx) assert_allclose(y_arr.grad, dy)
def test_identical_inputs(): v1 = Tensor(2.0, constant=False) v2 = v1 + v1 v3 = v2 + v2 v3.backward(1.0) # v3 = 4 * v1 assert v3.data.item() == 8.0 assert v1.grad.item() == 4.0
def test_scalar_only_op(a_const, a_scalar_only, b_const, b_scalar_only): """ op produces scalar_only result unless result is scalar. """ a = Tensor(0, constant=a_const, _scalar_only=a_scalar_only) b = Tensor(0, constant=b_const, _scalar_only=b_scalar_only) out = Tensor._op(ScalarOnlyOp, a, b) scalar_only = True and not out.constant assert scalar_only is out.scalar_only # check out.backward() if scalar_only: with raises(Exception): out.backward() else: out.backward() # a, b, out are const (nothing computed)
def test_setitem_multiple_input(): """ Ensures proper backprop through computational graph in which variable that is set on serves as multiple inputs to a single operation. Ensures that null-gradient and clear-graph works properly. """ from mygrad import add_sequence x = Tensor([1.0]) y = x + 0 assert_array_equal(y.data, np.array([1.0])) o = add_sequence(y, y, y) y[0] = 4 assert_array_equal(y.data, np.array([4.0])) f = o * y # 3 * 4 f.backward() assert_array_equal(o.data, np.array([3.0])) assert_array_equal(f.data, np.array([12.0])) assert_array_equal(x.grad, np.array([12.0])) assert_array_equal(o.grad, np.array([4.0])) assert_array_equal(y.grad, np.array([3.0])) f.null_gradients() assert x.grad is None and not x._ops and not x._accum_ops assert y.grad is None and not y._ops and not y._accum_ops assert o.grad is None and not o._ops and not o._accum_ops assert f.grad is None and not f._ops and not f._accum_ops
def test_max_fwd(a, num_axes, keepdims): a = Tensor(a) if num_axes == 0: axes = None else: axes = tuple( np.random.choice(range(0, a.ndim), size=min(num_axes, a.ndim), replace=False)) np_out = a.data.max(axis=axes, keepdims=keepdims) pygrad_out = a.max(axis=axes, keepdims=keepdims).data if pygrad_out.ndim == 0: pygrad_out = np.asscalar(pygrad_out) assert np.allclose(np_out, pygrad_out) if num_axes: neg_axes = tuple( np.random.choice(range(-a.ndim, 0), size=min(num_axes, a.ndim), replace=False)) np_out = a.data.max(axis=neg_axes, keepdims=keepdims) pygrad_out = a.max(axis=neg_axes, keepdims=keepdims).data if pygrad_out.ndim == 0: pygrad_out = np.asscalar(pygrad_out) assert np.allclose(np_out, pygrad_out)