Ejemplo n.º 1
0
def tensor_matmul(t1: Tensor, t2: Tensor) -> Tensor:
    """
    if t1 is (n1, m1) and t2 is (m1, m2), then t1 @ t2 is (n1, m2)
    so grad3 is (n1, m2)
    if t3 = t1 @ t2, and grad3 is the gradient of some function wrt t3, then
        grad1 = grad3 @ t2.T
        grad2 = t1.T @ grad3
    """
    data = t1.data @ t2.data
    requires_grad = t1.requires_grad or t2.requires_grad

    depends_on: List[Dependency] = []

    if t1.requires_grad:

        def grad_fn1(grad: np.ndarray) -> np.ndarray:
            return grad @ t2.data.T

        depends_on.append(Dependency(t1, grad_fn1))

    if t2.requires_grad:

        def grad_fn2(grad: np.ndarray) -> np.ndarray:
            return t1.data.T @ grad

        depends_on.append(Dependency(t2, grad_fn2))

    return Tensor(data, requires_grad, depends_on)
Ejemplo n.º 2
0
def tensor_neg(t: Tensor) -> Tensor:
    data = -t.data
    requires_grad = t.requires_grad
    if requires_grad:
        depends_on = [Dependency(t, lambda x: -x)]
    else:
        depends_on = []

    return Tensor(data, requires_grad, depends_on)
Ejemplo n.º 3
0
def tensor_transpose(t: Tensor) -> Tensor:
    data = t.data.T
    required_grad = t.requires_grad

    if required_grad:

        def grad_fn(grad: np.ndarray) -> np.ndarray:
            return grad.T

        depends_on = [Dependency(t, grad_fn)]
    else:
        depends_on = []

    return Tensor(data, required_grad, depends_on)
Ejemplo n.º 4
0
def relu(tensor: Tensor) -> Tensor:
    """
        relu(s) = max(0., s)
        relu'(s) = 1 if s >= 0 else 0
    """
    data = np.maximum(tensor.data, 0)
    requires_grad = tensor.requires_grad

    if requires_grad:
        def grad_fn(grad: np.ndarray) -> np.ndarray:
            return grad * np.array(data >= 0.0, dtype=np.int)
        depends_on = [Dependency(tensor, grad_fn)]
    else:
        depends_on = []

    return Tensor(data, requires_grad, depends_on)
Ejemplo n.º 5
0
def sigmoid(tensor: Tensor) -> Tensor:
    """
        sigmoid(s) = 1. / (1. + exp(s))
        sigmoid'(s) = sigmoid(s) * (1. - sigmoid(s))
    """
    data = 1. / (1. + np.exp(-tensor.data))
    requires_grad = tensor.requires_grad
    
    if requires_grad:
        def grad_fn(grad: np.ndarray) -> np.ndarray:
            return grad * data * (1. - data)
        depends_on = [Dependency(tensor, grad_fn)]
    else :
        depends_on = []

    return Tensor(data, requires_grad, depends_on)
Ejemplo n.º 6
0
def tanh(tensor: Tensor) -> Tensor:
    """
        tanh(s) = (exp(s) - exp(-s)) / (exp(s) + exp(-s))
        tanh'(s) = 1 - tanh(s) tanh(s)
    """
    data = np.tanh(tensor.data)
    requires_grad = tensor.requires_grad

    if requires_grad:
        def grad_fn(grad: np.ndarray) -> np.ndarray:
            return grad * (1 - data * data)
        depends_on = [Dependency(tensor, grad_fn)]
    else:
        depends_on = []

    return Tensor(data, requires_grad, depends_on)
Ejemplo n.º 7
0
def tensor_sum(t: Tensor) -> Tensor:
    """
        calc all the sum of all the value in Tensor
    """
    data = t.data.sum()
    requires_grad = t.requires_grad

    if requires_grad:

        def grad_fn(grad: np.ndarray) -> np.ndarray:
            return grad * np.ones_like(t.data)

        dependency = [Dependency(t, grad_fn=grad_fn)]
    else:
        dependency = []
    return Tensor(data, requires_grad, dependency)
Ejemplo n.º 8
0
def tensor_slice(t: Tensor, idxs) -> Tensor:
    data = t.data[idxs]
    requires_grad = t.requires_grad

    if requires_grad:

        def grad_fn(grad: np.ndarray) -> np.ndarray:
            bigger_grad = np.zeros_like(data)
            bigger_grad[idxs] = grad
            return bigger_grad

        depends_on = [Dependency(t, grad_fn)]
    else:
        depends_on = []

    return Tensor(data, requires_grad, depends_on)
Ejemplo n.º 9
0
def tensor_mul(t1: Tensor, t2: Tensor) -> Tensor:
    data = t1.data * t2.data
    requires_grad = t1.requires_grad or t2.requires_grad

    depends_on: List[Dependency] = []
    if t1.requires_grad:

        def grad_fn1(grad: np.ndarray) -> np.ndarray:
            grad = grad * t2.data

            # Sum out added dims
            ndims_added = grad.ndim - t1.data.ndim
            for _ in range(ndims_added):
                grad = grad.sum(axis=0)

            # Sum across broadcasted (but non-added dims)
            for i, dim in enumerate(t1.shape):
                if dim == 1:
                    grad = grad.sum(axis=i, keepdims=True)

            return grad

        depends_on.append(Dependency(t1, grad_fn1))

    if t2.requires_grad:

        def grad_fn2(grad: np.ndarray) -> np.ndarray:
            grad = grad * t1.data

            # Sum out added dims
            ndims_added = grad.ndim - t2.data.ndim
            for _ in range(ndims_added):
                grad = grad.sum(axis=0)

            # Sum across broadcasted (but non-added dims)
            for i, dim in enumerate(t2.shape):
                if dim == 1:
                    grad = grad.sum(axis=i, keepdims=True)

            return grad

        depends_on.append(Dependency(t2, grad_fn2))

    return Tensor(data, requires_grad, depends_on)