def cost(Weights, X, Y, lambda_param=1.0): # Number of samples m = Y.dims()[0] dim0 = Weights.dims()[0] dim1 = Weights.dims()[1] if len(Weights.dims()) > 1 else None dim2 = Weights.dims()[2] if len(Weights.dims()) > 2 else None dim3 = Weights.dims()[3] if len(Weights.dims()) > 3 else None # Make the lambda corresponding to Weights(0) == 0 lambdat = af.constant(lambda_param, dim0, dim1, dim2, dim3) # No regularization for bias weights lambdat[0, :] = 0 # Get the prediction H = predict_prob(X, Weights) # Cost of misprediction Jerr = -1 * af.sum(Y * af.log(H) + (1 - Y) * af.log(1 - H), dim=0) # Regularization cost Jreg = 0.5 * af.sum(lambdat * Weights * Weights, dim=0) # Total cost J = (Jerr + Jreg) / m # Find the gradient of cost D = (H - Y) dJ = (af.matmulTN(X, D) + lambdat * Weights) / m return J, dJ
def train(self, X, Y): # Initialize parameters to 0 self.__weights = af.constant(0, X.dims()[1], Y.dims()[1]) # self.__weights = af.randu(X.dims()[1], Y.dims()[1]) for i in range(self.__maxiter): P = self.predict_proba(X) err = Y - P mean_abs_err = af.mean(af.abs(err)) if mean_abs_err < self.__maxerr: break if self.__verbose and ((i + 1) % 25 == 0): print("Iter: {}, Err: {}".format(i+1, mean_abs_err)) self.__weights = self.__weights + self.__alpha * af.matmulTN(X, err)
def _cost(self, Weights: af.Array, X: af.Array, Y: af.Array, reg_constant: float, penalty: str) -> (af.Array, af.Array): # Number of samples m = Y.dims()[0] dim0 = Weights.dims()[0] dim1 = Weights.dims()[1] if len(Weights.dims()) > 1 else None dim2 = Weights.dims()[2] if len(Weights.dims()) > 2 else None dim3 = Weights.dims()[3] if len(Weights.dims()) > 3 else None # Make the lambda corresponding to Weights(0) == 0 lambdat = af.constant(reg_constant, dim0, dim1, dim2, dim3) # No regularization for bias weights lambdat[0, :] = 0 # Get the prediction H = self._predict_proba(X, Weights) # Cost of misprediction Jerr = -1 * af.sum(Y * af.log(H) + (1 - Y) * af.log(1 - H), dim=0) # Regularization cost penalty_norm = None if penalty == 'l2': penalty_norm = Weights * Weights else: penalty_norm = af.abs(Weights) Jreg = 0.5 * af.sum(lambdat * penalty_norm, dim=0) # Total cost J = (Jerr + Jreg) / m # Find the gradient of cost D = (H - Y) dJ = (af.matmulTN(X, D) + lambdat * Weights) / m return J, dJ
def simple_lapack(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) a = af.randu(5, 5) l, u, p = af.lu(a) display_func(l) display_func(u) display_func(p) p = af.lu_inplace(a, "full") display_func(a) display_func(p) a = af.randu(5, 3) q, r, t = af.qr(a) display_func(q) display_func(r) display_func(t) af.qr_inplace(a) display_func(a) a = af.randu(5, 5) a = af.matmulTN(a, a.copy()) + 10 * af.identity(5, 5) R, info = af.cholesky(a) display_func(R) print_func(info) af.cholesky_inplace(a) display_func(a) a = af.randu(5, 5) ai = af.inverse(a) display_func(a) display_func(ai) x0 = af.randu(5, 3) b = af.matmul(a, x0) x1 = af.solve(a, b) display_func(x0) display_func(x1) p = af.lu_inplace(a) x2 = af.solve_lu(a, p, b) display_func(x2) print_func(af.rank(a)) print_func(af.det(a)) print_func(af.norm(a, af.NORM.EUCLID)) print_func(af.norm(a, af.NORM.MATRIX_1)) print_func(af.norm(a, af.NORM.MATRIX_INF)) print_func(af.norm(a, af.NORM.MATRIX_L_PQ, 1, 1)) a = af.randu(10, 10) display_func(a) u, s, vt = af.svd(a) display_func(af.matmul(af.matmul(u, af.diag(s, 0, False)), vt)) u, s, vt = af.svd_inplace(a) display_func(af.matmul(af.matmul(u, af.diag(s, 0, False)), vt))
def simple_lapack(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) a = af.randu(5,5) l,u,p = af.lu(a) display_func(l) display_func(u) display_func(p) p = af.lu_inplace(a, "full") display_func(a) display_func(p) a = af.randu(5,3) q,r,t = af.qr(a) display_func(q) display_func(r) display_func(t) af.qr_inplace(a) display_func(a) a = af.randu(5, 5) a = af.matmulTN(a, a) + 10 * af.identity(5,5) R,info = af.cholesky(a) display_func(R) print_func(info) af.cholesky_inplace(a) display_func(a) a = af.randu(5,5) ai = af.inverse(a) display_func(a) display_func(ai) x0 = af.randu(5, 3) b = af.matmul(a, x0) x1 = af.solve(a, b) display_func(x0) display_func(x1) p = af.lu_inplace(a) x2 = af.solve_lu(a, p, b) display_func(x2) print_func(af.rank(a)) print_func(af.det(a)) print_func(af.norm(a, af.NORM.EUCLID)) print_func(af.norm(a, af.NORM.MATRIX_1)) print_func(af.norm(a, af.NORM.MATRIX_INF)) print_func(af.norm(a, af.NORM.MATRIX_L_PQ, 1, 1))
af.display(p) a = af.randu(5, 3) q, r, t = af.qr(a) af.display(q) af.display(r) af.display(t) af.qr_inplace(a) af.display(a) a = af.randu(5, 5) a = af.matmulTN(a, a) + 10 * af.identity(5, 5) R, info = af.cholesky(a) af.display(R) print(info) af.cholesky_inplace(a) af.display(a) a = af.randu(5, 5) ai = af.inverse(a) af.display(a) af.display(ai) x0 = af.randu(5, 3)