Beispiel #1
0
def test_gradients():
    a, b = Tensor.from_builtin([1, 2, 3]), Tensor.from_builtin([[1, 2, 3], [4, 5, 6]])
    x, y = coalesce(a, b)

    x_gradients = Gradients.trace((x * x).sum())
    assert np.all(x_gradients[a] == [4, 8, 12])
    with pytest.raises(KeyError):
        x_gradients[b]

    y_gradients = Gradients.trace((y * y).sum())
    assert np.all(y_gradients[b] == [[2, 4, 6], [8, 10, 12]])
    with pytest.raises(KeyError):
        y_gradients[a]
Beispiel #2
0
def test_chain():
    tensor = Tensor.from_builtin([1, 2, 3])
    output = -ops.sum(tensor)
    gradients = Gradients.trace(output)

    assert np.allclose(gradients[tensor], [-1, -1, -1])
    assert np.allclose(gradients[output], 1)
Beispiel #3
0
def test_accumulate():
    start = Tensor.from_builtin([1, 2, 3])
    intermediate = start * start
    end = ops.sum(start - intermediate)
    gradients = Gradients.trace(end)

    assert np.allclose(gradients[start], [-1, -3, -5])
    assert np.allclose(gradients[intermediate], [-1, -1, -1])
    assert np.allclose(gradients[end], 1)
Beispiel #4
0
def test_gradients():
    tensor = Tensor.from_builtin([[1, 2], [3, 4]])
    batch_norm = BatchNormalization(
        mean=np.array([4, -1]),
        variance=np.array([1, 0.25]),
        persistence=0.9,
        shift=Tensor.from_builtin([3, 2]),
        scale=Tensor.from_builtin([1, 1]),
    )
    with BatchNormalization.mode(BatchNormalization.Mode.test):
        loss = batch_norm(tensor).sum()
    gradients = Gradients.trace(loss)
    assert np.allclose(gradients[tensor], [[1, 2], [1, 2]])
    assert np.allclose(gradients[batch_norm.shift], [2, 2])
    assert np.allclose(gradients[batch_norm.scale], [-4, 16])
def test_train_step():
    data = Tensor.from_builtin([[1, 2, 3], [4, 5, 0]])
    targets = Tensor.from_builtin([[1, 0], [0, 1]])
    weights = Tensor.from_builtin([[2, 0], [1, 1], [0, 3]])
    biases = Tensor.from_builtin([-2, -7])

    def compute_loss():
        logits = matrix_multiply(data, weights) + biases
        probabilities = logits.softmax(-1)
        return -(probabilities.log() * targets).sum()

    loss = compute_loss()
    gradients = Gradients.trace(loss)
    weights -= Tensor.from_numpy(gradients[weights])
    biases -= Tensor.from_numpy(gradients[biases])

    assert compute_loss().data < loss.data