def test_gradients(): left = Tensor.from_builtin([2, 3, 4]) right = Tensor.from_builtin([3, 2, 1]) result = left / right gradients = Gradients._trace(Gradient(tensor=result, gradient=np.array([1, 2, 3]))) assert np.allclose(gradients[left], [1 / 3, 2 / 2, 3 / 1]) assert np.allclose(gradients[right], [-2 / 9, -6 / 4, -12 / 1])
def test_gradients(): left = Tensor.from_builtin([2, 3, 4]) right = Tensor.from_builtin([3, 2, 1]) result = left - right gradients = Gradients._trace(Gradient(tensor=result, gradient=np.array([1, 2, 3]))) assert np.all(gradients[left] == [1, 2, 3]) assert np.all(gradients[right] == [-1, -2, -3])
def test_matrix(): assert Tensor.from_builtin([[1, 2], [3, 4]]).tile([2, 1]) == Tensor.from_builtin( [[1, 2], [3, 4], [1, 2], [3, 4]] ) assert Tensor.from_builtin([[1, 2], [3, 4]]).tile([1, 2]) == Tensor.from_builtin( [[1, 2, 1, 2], [3, 4, 3, 4]] )
def test_no_activation(): layer = Layer( weights=Tensor.from_builtin([[1, 2, -1], [-3, 5, 0]]), biases=Tensor.from_builtin([-1, 1, 2]), activation=lambda tensor: tensor, ) assert layer([[0, 1], [3, 1]]) == Tensor.from_builtin([[-4, 6, 2], [-1, 12, -1]])
def test_relu(): layer = Layer( weights=Tensor.from_builtin([[1, 2, -1], [-3, 5, 0]]), biases=Tensor.from_builtin([-1, 1, 2]), activation=lambda tensor: tensor.clip(low=0), ) assert layer([[0, 1], [3, 1]]) == Tensor.from_builtin([[0, 6, 2], [0, 12, 0]])
def test_gradients(): left = Tensor.from_builtin([[1, 2, 3], [3, 4, 1]]) right = Tensor.from_builtin([[3], [1], [-1]]) output = matrix_multiply(left, right) gradients = output.backward(np.array([[1], [-1]])) assert np.all(gradients[left] == [[3, 1, -1], [-3, -1, 1]]) assert np.all(gradients[right] == [[-2], [-2], [2]])
def test_read_only(): with pytest.raises(FrozenInstanceError): tensor = Tensor.from_builtin([1, 2, 3]) tensor.data = 3 with pytest.raises(ValueError): tensor = Tensor.from_builtin([1, 2, 3]) tensor.data[1] = 3
def test_tensor(): assert ( matrix_multiply( Tensor.from_builtin([[1, 2, 3], [3, 4, 1]]), Tensor.from_builtin([[3], [1], [-1]]), ) == Tensor.from_builtin([[2], [12]]) )
def test_gradients(): tensor = Tensor.from_builtin([2, 3, 4]) low = Tensor.from_builtin([1, 2, 1]) high = Tensor.from_builtin([3, 4, 2]) result = tensor.clip(low, high) gradients = Gradients._trace(Gradient(tensor=result, gradient=np.array([1, 2, 3]))) assert np.allclose(gradients[tensor], [1, 2, 0]) with pytest.raises(KeyError): gradients[low] with pytest.raises(KeyError): gradients[high]
def test_gradients(): a, b = Tensor.from_builtin([1, 2, 3]), Tensor.from_builtin([[1, 2, 3], [4, 5, 6]]) x, y = coalesce(a, b) x_gradients = Gradients.trace((x * x).sum()) assert np.all(x_gradients[a] == [4, 8, 12]) with pytest.raises(KeyError): x_gradients[b] y_gradients = Gradients.trace((y * y).sum()) assert np.all(y_gradients[b] == [[2, 4, 6], [8, 10, 12]]) with pytest.raises(KeyError): y_gradients[a]
def test_gradients(): tensor = Tensor.from_builtin([[1, 2], [3, 4]]) batch_norm = BatchNormalization( mean=np.array([4, -1]), variance=np.array([1, 0.25]), persistence=0.9, shift=Tensor.from_builtin([3, 2]), scale=Tensor.from_builtin([1, 1]), ) with BatchNormalization.mode(BatchNormalization.Mode.test): loss = batch_norm(tensor).sum() gradients = Gradients.trace(loss) assert np.allclose(gradients[tensor], [[1, 2], [1, 2]]) assert np.allclose(gradients[batch_norm.shift], [2, 2]) assert np.allclose(gradients[batch_norm.scale], [-4, 16])
def test_gradients(): logits = Tensor.from_builtin([1, 2, 3]) probabilities = logits.sigmoid() gradients = Gradients._trace( Gradient(tensor=probabilities, gradient=np.array([1, 0, -1])) ) assert np.allclose(gradients[logits], [0.1966, 0, -0.0452], atol=1e-4)
def test_chain(): tensor = Tensor.from_builtin([1, 2, 3]) output = -ops.sum(tensor) gradients = Gradients.trace(output) assert np.allclose(gradients[tensor], [-1, -1, -1]) assert np.allclose(gradients[output], 1)
def test_gradients_full(): logits = Tensor.from_builtin([1, 2, 3]) probabilities = logits.softmax() gradients = Gradients._trace( Gradient(tensor=probabilities, gradient=np.array([1, 0, 0])) ) assert np.allclose(gradients[logits], [0.0819, -0.0220, -0.0599], atol=1e-4)
def test_accumulate(): start = Tensor.from_builtin([1, 2, 3]) intermediate = start * start end = ops.sum(start - intermediate) gradients = Gradients.trace(end) assert np.allclose(gradients[start], [-1, -3, -5]) assert np.allclose(gradients[intermediate], [-1, -1, -1]) assert np.allclose(gradients[end], 1)
def test_gradients_one_axis(): logits = Tensor.from_builtin([[1, 2, 3], [4, 5, 6]]) probabilities = logits.softmax(1) gradients = Gradients._trace( Gradient(tensor=probabilities, gradient=np.array([[1, 0, 0], [0, 1, 0]])) ) assert np.allclose( gradients[logits], [[0.0819, -0.0220, -0.0599], [-0.0220, 0.1848, -0.1628]], atol=1e-4, )
def test_matrix_gradients(): tensor = Tensor.from_builtin([[5, 6], [7, 8]]) result = tensor.tile([2, 3]) gradients = result.backward( np.array( [ [1, 2, 5, 3, 1, 0], [3, 5, 9, 2, 1, 4], [6, 2, 5, 3, 3, 2], [2, 5, 7, 2, 4, 7], ] ) ) assert np.all(gradients[tensor] == [[21, 12], [26, 25]])
def test_train_step(): data = Tensor.from_builtin([[1, 2, 3], [4, 5, 0]]) targets = Tensor.from_builtin([[1, 0], [0, 1]]) weights = Tensor.from_builtin([[2, 0], [1, 1], [0, 3]]) biases = Tensor.from_builtin([-2, -7]) def compute_loss(): logits = matrix_multiply(data, weights) + biases probabilities = logits.softmax(-1) return -(probabilities.log() * targets).sum() loss = compute_loss() gradients = Gradients.trace(loss) weights -= Tensor.from_numpy(gradients[weights]) biases -= Tensor.from_numpy(gradients[biases]) assert compute_loss().data < loss.data
def test_single(): assert Tensor.from_builtin([-1, 2]).expand_dims(0) == Tensor.from_builtin( [[-1, 2]]) assert Tensor.from_builtin([-1, 2 ]).expand_dims(1) == Tensor.from_builtin([[-1], [2]])
def test_gradients_negative_axis(): tensor = Tensor.from_builtin([2, 3, 4]) result = tensor.expand_dims(-1) gradients = result.backward(np.array([[1], [2], [3]])) assert np.all(gradients[tensor] == [1, 2, 3])
def test_none(): assert Tensor.from_builtin([-1, 2]).expand_dims() == Tensor.from_builtin( [-1, 2])
def test_negative(): assert Tensor.from_builtin([-1, 2]).expand_dims(-2) == Tensor.from_builtin( [[-1, 2]]) assert Tensor.from_builtin([-1, 2]).expand_dims(-1) == Tensor.from_builtin( [[-1], [2]])
def test_list(): assert Tensor.from_builtin([-1, 2]).expand_dims( [0, 2]) == Tensor.from_builtin([[[-1], [2]]])
def test_righthand(): assert [1, 2.5] - Tensor.from_builtin([3, -1]) == Tensor.from_builtin([-2, 3.5])
def test_values(): assert Tensor.from_builtin([0, 1]).sigmoid() == Tensor.from_builtin( [0.5, np.e / (1 + np.e)] )
def make_normalizer(train_features: np.ndarray): mean = np.mean(train_features) std = np.std(train_features) return lambda features: Tensor.from_numpy((features - mean) / std)
def test_float(): assert Tensor.from_builtin(3) - 3.5 == Tensor.from_builtin(-0.5)
def test_int(): assert Tensor.from_builtin(2) - 3 == Tensor.from_builtin(-1)
def test_tensor(): assert Tensor.from_builtin([1, 2.5]) - Tensor.from_builtin( [3, -1] ) == Tensor.from_builtin([-2, 3.5])
def test_broadcast(): assert Tensor.from_builtin(2) - Tensor.from_builtin([1, 2]) == Tensor.from_builtin( [1, 0] )