예제 #1
0
    def _fit(self, X, t, clip_min_norm=1e-10):
        self._check_input(X)
        self._check_target(t)
        self._check_binary(t)
        X0 = X[t == 0]
        X1 = X[t == 1]
        m0 = np.mean(X0, axis=0)
        m1 = np.mean(X1, axis=0)
	#equation4.28 with-in class covriance
        cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1)
	#equation4.30
        self.w = np.linalg.solve(cov_inclass, m1 - m0)
        self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm)
	#estimate variance and mean of x0 and x1
        g0 = Gaussian()
        g0.fit((X0 @ self.w)[:, None])
        g1 = Gaussian()
        g1.fit((X1 @ self.w)[:, None])
	#calculate threshold
        a = g1.var - g0.var
        b = g0.var * g1.mu - g1.var * g0.mu
        c = (
            g1.var * g0.mu ** 2 - g0.var * g1.mu ** 2
            - g1.var * g0.var * np.log(g1.var / g0.var)
        )
        self.threshold = (np.sqrt(b ** 2 - a * c) - b) / a
    def fit(self, X: np.ndarray, t: np.ndarray):
        """
        estimate parameter given training dataset

        Parameters
        ----------
        X : (N, D) np.ndarray
            training dataset independent variable
        t : (N,) np.ndarray
            training dataset dependent variable
            binary 0 or 1
        """
        X0 = X[t == 0]
        X1 = X[t == 1]
        m0 = np.mean(X0, axis=0)
        m1 = np.mean(X1, axis=0)
        cov_inclass = np.cov(X0, rowvar=False) + np.cov(X1, rowvar=False)
        self.w = np.linalg.solve(cov_inclass, m1 - m0)
        self.w /= np.linalg.norm(self.w).clip(min=1e-10)

        g0 = Gaussian()
        g0.fit((X0 @ self.w))
        g1 = Gaussian()
        g1.fit((X1 @ self.w))
        root = np.roots([
            g1.var - g0.var, 2 * (g0.var * g1.mu - g1.var * g0.mu),
            g1.var * g0.mu**2 - g0.var * g1.mu**2 -
            g1.var * g0.var * np.log(g1.var / g0.var)
        ])
        if g0.mu < root[0] < g1.mu or g1.mu < root[0] < g0.mu:
            self.threshold = root[0]
        else:
            self.threshold = root[1]
예제 #3
0
    def _fit(self, X, t, clip_min_norm=1e-10):
        self._check_input(X)
        self._check_target(t)
        self._check_binary(t)
        X0 = X[t == 0]
        X1 = X[t == 1]
        # (4.21)
        m0 = np.mean(X0, axis=0)
        m1 = np.mean(X1, axis=0)

        # (4.24) s_k^2 = \sum_{n \in C_k} (y_n - m_k)^2
        cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1)

        # (4.30) w \propto S_w^{-1} (m_2 - m_1)
        self.w = np.linalg.solve(cov_inclass, m1 - m0)
        self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm)
        g0 = Gaussian()
        g0.fit((X0 @ self.w)[:, None])
        g1 = Gaussian()
        g1.fit((X1 @ self.w)[:, None])
        a = g1.var - g0.var
        b = g0.var * g1.mu - g1.var * g0.mu
        c = (g1.var * g0.mu**2 - g0.var * g1.mu**2 -
             g1.var * g0.var * np.log(g1.var / g0.var))
        self.threshold = (np.sqrt(b**2 - a * c) - b) / a
예제 #4
0
    def fit(self, x_train: np.ndarray, y_train: np.ndarray):
        """Estimate parameter given training dataset.

        Parameters
        ----------
        x_train : np.ndarray
            training dataset independent variable (N, D)
        y_train : np.ndarray
            training dataset dependent variable (N,)
            binary 0 or 1
        """
        x0 = x_train[y_train == 0]
        x1 = x_train[y_train == 1]
        m0 = np.mean(x0, axis=0)
        m1 = np.mean(x1, axis=0)
        cov_inclass = np.cov(x0, rowvar=False) + np.cov(x1, rowvar=False)
        self.w = np.linalg.solve(cov_inclass, m1 - m0)
        self.w /= np.linalg.norm(self.w).clip(min=1e-10)

        g0 = Gaussian()
        g0.fit((x0 @ self.w))
        g1 = Gaussian()
        g1.fit((x1 @ self.w))
        root = np.roots([
            g1.var - g0.var,
            2 * (g0.var * g1.mu - g1.var * g0.mu),
            g1.var * g0.mu**2 - g0.var * g1.mu**2 -
            g1.var * g0.var * np.log(g1.var / g0.var),
        ])
        if g0.mu < root[0] < g1.mu or g1.mu < root[0] < g0.mu:
            self.threshold = root[0]
        else:
            self.threshold = root[1]
예제 #5
0
 def _fit(self, X, t, clip_min_norm=1e-10):
     self._check_input(X)
     self._check_target(t)
     self._check_binary(t)
     X0 = X[t == 0]
     X1 = X[t == 1]
     m0 = np.mean(X0, axis=0)
     m1 = np.mean(X1, axis=0)
     cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1)
     self.w = np.linalg.solve(cov_inclass, m1 - m0)
     self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm)
     g0 = Gaussian()
     g0.fit((X0 @ self.w)[:, None])
     g1 = Gaussian()
     g1.fit((X1 @ self.w)[:, None])
     a = g1.var - g0.var
     b = g0.var * g1.mu - g1.var * g0.mu
     c = (g1.var * g0.mu**2 - g0.var * g1.mu**2 -
          g1.var * g0.var * np.log(g1.var / g0.var))
     self.threshold = (np.sqrt(b**2 - a * c) - b) / a
	def _fit(self, X, t, clip_min_norm = 1e-10):
		self._check_input(X)
		self._check_target(t)
		self._check_binary(t)
		X0 = X[t == 0]
		X1 = X[t == 1]
		m0 = np.mean(X0, axis = 0)
		m1 = np.mean(X1, axis = 0)
		cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1) # 类内散度矩阵
		self.w = np.linalg.solve(cov_inclass, m1 - m0) # 找特征向量 #solve函数有两个参数a和b。a是一个N*N的二维数组,而b是一个长度为N的一维数组,solve函数找到一个长度为N的一维数组x,使得a和x的矩阵乘积正好等于b,数组x就是多元一次方程组的解。
		self.w /= np.linalg.norm(self.w)clip(min = clip_min_norm) # 正则化
		# 以上是参数拟合,也就是得到w, 
		# 下面是得到输出X · w,变换后的变量Z , 并对这个输出做高斯拟合
		g0 = Gaussian()
		g0.fit((X0 @ self.w)[:, None])
		g1 = Gaussian()
		g1.fit((X1 @ self.w)[:, None])
		a = g1.var - g0.var
		b = g0.var * g1.mu - g1.var * g0.mu
		c = (g1.var * g0.mu**2 - g0.var * g1.mu**2 - g1.var * g0.var * np.log(g1.var / g0.var))
		self.threshold = (np.sqrt(b**2 - a * c) - b) / a # 这个公式有点像求根公式, 以上几行都是求为了求threshold,这个方法,等以后问问作者吧,自己不研究了

		"""
 def _fit(self, X, t, clip_min_norm=1e-10):
     self._check_input(X)
     self._check_target(t)
     self._check_binary(t)
     X0 = X[t == 0]
     X1 = X[t == 1]
     m0 = np.mean(X0, axis=0)
     m1 = np.mean(X1, axis=0)
     cov_inclass = (X0 - m0).T @ (X0 - m0) + (X1 - m1).T @ (X1 - m1)
     self.w = np.linalg.solve(cov_inclass, m1 - m0)
     self.w /= np.linalg.norm(self.w).clip(min=clip_min_norm)
     g0 = Gaussian()
     g0.fit((X0 @ self.w)[:, None])
     g1 = Gaussian()
     g1.fit((X1 @ self.w)[:, None])
     a = g1.var - g0.var
     b = g0.var * g1.mu - g1.var * g0.mu
     c = (
         g1.var * g0.mu ** 2 - g0.var * g1.mu ** 2
         - g1.var * g0.var * np.log(g1.var / g0.var)
     )
     self.threshold = (np.sqrt(b ** 2 - a * c) - b) / a