def derivMulPar(x: ndarray, y: ndarray, sigma: Callable) -> ndarray: assert (x.shape == y.shape) #in book: deriv of (x+y)/dx is 1 alpha = x * y dx = ms.deriv(sigma, alpha) * y dy = ms.deriv(sigma, alpha) * x return dx, dy
def chainDeriv_3( chain: Chain, #[f, g, h] input_: ndarray, diff: float = 0.001) -> ndarray: assert len(chain) == 3 # d[f(g(h(x)))] / dx = f'(g(h(x))) * d[g(h(x))] / dx = f'(g(h(x))) * g'(h(x)) * h'(x) return ms.deriv(chain[0], chain[1](chain[2](input_)), diff) * \ ms.deriv(chain[1], chain[2](input_), diff) * \ ms.deriv(chain[2], input_, diff)
def derivMulMatSigma(X: ndarray, # m x n W: ndarray, # n x p sigma: Callable) -> ndarray: assert(X.shape[1] == W.shape[0]) N = np.dot(X, W) dSdu = ms.deriv(sigma, N) return np.dot(dSdu, np.transpose(W))
def chainDeriv_3_with_chainDeriv_2( chain: Chain, #[f, g, h] input_: ndarray, diff: float = 0.001) -> ndarray: assert len(chain) == 3 f = chain[0] g_h = lambda input_: chain[1](chain[2](input_)) # d[f(g(h(x)))] / dx = f'(g(h(x))) * d[g(h(x))] / dx return ms.deriv(f, g_h(input_), diff) * ms.chainDeriv([chain[1], chain[2]], input_, diff)
def matmul_backward_sum( X: ndarray, # m x n W: ndarray, # n x p sigma: Callable) -> ndarray: N = np.dot(X, W) # m x p S = sigma(N) # m x p # would be the same calc how, but not so efficient: S = ms.matmul_forward(X, W, sigma) dSdN = ms.deriv(sigma, N) # delta = 0.001 # dLdN = dSdN * dLdS = dSdN dLdX = np.dot(dSdN, np.transpose(W)) # (m x p) * (p x n) = m x n dLdW = np.dot(np.transpose(X), dSdN) # (n x m) * (m x p) = n x p return dLdX, dLdW
assert len(chain) == 3 f = chain[0] g_h = lambda input_: chain[1](chain[2](input_)) # d[f(g(h(x)))] / dx = f'(g(h(x))) * d[g(h(x))] / dx return ms.deriv(f, g_h(input_), diff) * ms.chainDeriv([chain[1], chain[2]], input_, diff) def g(input_: ndarray) -> ndarray: # x + 3 return input_ + 3 def h(input_: ndarray) -> ndarray: # x^3 - 5 return np.power(input_, 3) - 5 def f_g_h(input_: ndarray) -> ndarray: # f(g(h(x))) = (x^3 - 5 + 3)^2 = # (x^3 - 2)^2= x^6 - 4x^3 + 4 return np.power(input_, 6) - 4 * np.power(input_, 3) + 4 x = np.array([1, 2, 3, 4]) print("x values", x, ", diff is 0.001") print("f_g_h(x)", f_g_h(x)) print("f(g(h(x)))", ms.parabel(g(h(x))), '\n') print("deriv of f_g_h at x", ms.deriv(f_g_h, x)) print("chainDeriv_3 of [f, g, h] at x", chainDeriv_3([ms.parabel, g, h], x)) print("chainDeriv_3 with chainDeriv_2 of [f, g, h] at x", chainDeriv_3_with_chainDeriv_2([ms.parabel, g, h], x))