def _fit(self, X, t, clip_min_norm=1e-10): self._check_input(X) self._check_target(t) self._check_binary(t) X0 = X[t == 0] X1 = X[t == 1] m0 = np.mean(X0, axis=0) m1 = np.mean(X1, axis=0) #equation4.28 with-in class covriance cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1) #equation4.30 self.w = np.linalg.solve(cov_inclass, m1 - m0) self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm) #estimate variance and mean of x0 and x1 g0 = Gaussian() g0.fit((X0 @ self.w)[:, None]) g1 = Gaussian() g1.fit((X1 @ self.w)[:, None]) #calculate threshold a = g1.var - g0.var b = g0.var * g1.mu - g1.var * g0.mu c = ( g1.var * g0.mu ** 2 - g0.var * g1.mu ** 2 - g1.var * g0.var * np.log(g1.var / g0.var) ) self.threshold = (np.sqrt(b ** 2 - a * c) - b) / a
def fit(self, X: np.ndarray, t: np.ndarray): """ estimate parameter given training dataset Parameters ---------- X : (N, D) np.ndarray training dataset independent variable t : (N,) np.ndarray training dataset dependent variable binary 0 or 1 """ X0 = X[t == 0] X1 = X[t == 1] m0 = np.mean(X0, axis=0) m1 = np.mean(X1, axis=0) cov_inclass = np.cov(X0, rowvar=False) + np.cov(X1, rowvar=False) self.w = np.linalg.solve(cov_inclass, m1 - m0) self.w /= np.linalg.norm(self.w).clip(min=1e-10) g0 = Gaussian() g0.fit((X0 @ self.w)) g1 = Gaussian() g1.fit((X1 @ self.w)) root = np.roots([ g1.var - g0.var, 2 * (g0.var * g1.mu - g1.var * g0.mu), g1.var * g0.mu**2 - g0.var * g1.mu**2 - g1.var * g0.var * np.log(g1.var / g0.var) ]) if g0.mu < root[0] < g1.mu or g1.mu < root[0] < g0.mu: self.threshold = root[0] else: self.threshold = root[1]
def _fit(self, X, t, clip_min_norm=1e-10): self._check_input(X) self._check_target(t) self._check_binary(t) X0 = X[t == 0] X1 = X[t == 1] # (4.21) m0 = np.mean(X0, axis=0) m1 = np.mean(X1, axis=0) # (4.24) s_k^2 = \sum_{n \in C_k} (y_n - m_k)^2 cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1) # (4.30) w \propto S_w^{-1} (m_2 - m_1) self.w = np.linalg.solve(cov_inclass, m1 - m0) self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm) g0 = Gaussian() g0.fit((X0 @ self.w)[:, None]) g1 = Gaussian() g1.fit((X1 @ self.w)[:, None]) a = g1.var - g0.var b = g0.var * g1.mu - g1.var * g0.mu c = (g1.var * g0.mu**2 - g0.var * g1.mu**2 - g1.var * g0.var * np.log(g1.var / g0.var)) self.threshold = (np.sqrt(b**2 - a * c) - b) / a
def fit(self, x_train: np.ndarray, y_train: np.ndarray): """Estimate parameter given training dataset. Parameters ---------- x_train : np.ndarray training dataset independent variable (N, D) y_train : np.ndarray training dataset dependent variable (N,) binary 0 or 1 """ x0 = x_train[y_train == 0] x1 = x_train[y_train == 1] m0 = np.mean(x0, axis=0) m1 = np.mean(x1, axis=0) cov_inclass = np.cov(x0, rowvar=False) + np.cov(x1, rowvar=False) self.w = np.linalg.solve(cov_inclass, m1 - m0) self.w /= np.linalg.norm(self.w).clip(min=1e-10) g0 = Gaussian() g0.fit((x0 @ self.w)) g1 = Gaussian() g1.fit((x1 @ self.w)) root = np.roots([ g1.var - g0.var, 2 * (g0.var * g1.mu - g1.var * g0.mu), g1.var * g0.mu**2 - g0.var * g1.mu**2 - g1.var * g0.var * np.log(g1.var / g0.var), ]) if g0.mu < root[0] < g1.mu or g1.mu < root[0] < g0.mu: self.threshold = root[0] else: self.threshold = root[1]
def _fit(self, X, t, clip_min_norm=1e-10): self._check_input(X) self._check_target(t) self._check_binary(t) X0 = X[t == 0] X1 = X[t == 1] m0 = np.mean(X0, axis=0) m1 = np.mean(X1, axis=0) cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1) self.w = np.linalg.solve(cov_inclass, m1 - m0) self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm) g0 = Gaussian() g0.fit((X0 @ self.w)[:, None]) g1 = Gaussian() g1.fit((X1 @ self.w)[:, None]) a = g1.var - g0.var b = g0.var * g1.mu - g1.var * g0.mu c = (g1.var * g0.mu**2 - g0.var * g1.mu**2 - g1.var * g0.var * np.log(g1.var / g0.var)) self.threshold = (np.sqrt(b**2 - a * c) - b) / a
def _fit(self, X, t, clip_min_norm = 1e-10): self._check_input(X) self._check_target(t) self._check_binary(t) X0 = X[t == 0] X1 = X[t == 1] m0 = np.mean(X0, axis = 0) m1 = np.mean(X1, axis = 0) cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1) # 类内散度矩阵 self.w = np.linalg.solve(cov_inclass, m1 - m0) # 找特征向量 #solve函数有两个参数a和b。a是一个N*N的二维数组,而b是一个长度为N的一维数组,solve函数找到一个长度为N的一维数组x,使得a和x的矩阵乘积正好等于b,数组x就是多元一次方程组的解。 self.w /= np.linalg.norm(self.w)clip(min = clip_min_norm) # 正则化 # 以上是参数拟合,也就是得到w, # 下面是得到输出X · w,变换后的变量Z , 并对这个输出做高斯拟合 g0 = Gaussian() g0.fit((X0 @ self.w)[:, None]) g1 = Gaussian() g1.fit((X1 @ self.w)[:, None]) a = g1.var - g0.var b = g0.var * g1.mu - g1.var * g0.mu c = (g1.var * g0.mu**2 - g0.var * g1.mu**2 - g1.var * g0.var * np.log(g1.var / g0.var)) self.threshold = (np.sqrt(b**2 - a * c) - b) / a # 这个公式有点像求根公式, 以上几行都是求为了求threshold,这个方法,等以后问问作者吧,自己不研究了 """
def _fit(self, X, t, clip_min_norm=1e-10): self._check_input(X) self._check_target(t) self._check_binary(t) X0 = X[t == 0] X1 = X[t == 1] m0 = np.mean(X0, axis=0) m1 = np.mean(X1, axis=0) cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1) self.w = np.linalg.solve(cov_inclass, m1 - m0) self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm) g0 = Gaussian() g0.fit((X0 @ self.w)[:, None]) g1 = Gaussian() g1.fit((X1 @ self.w)[:, None]) a = g1.var - g0.var b = g0.var * g1.mu - g1.var * g0.mu c = ( g1.var * g0.mu ** 2 - g0.var * g1.mu ** 2 - g1.var * g0.var * np.log(g1.var / g0.var) ) self.threshold = (np.sqrt(b ** 2 - a * c) - b) / a