def __init__(self, A, frac=1.0, center=True, scale=True, cov=False, tol=1e-8): ''' Perform the initial principal components calculation, and store information for future queries. Note that your principal components should not include any axes corresponding to zero eigenvalues. @param A: Original data matrix (d x N) @type A: 2-dimensional NumPy array @keyword frac: The initial proportion of variance to be explained. This is used to select the number of principal components, using L{setfrac}. Should be between 0 and 1 (inclusive). @type frac: float @keyword center: C{True} if C{A} should be centred to zero mean before processing @type center: boolean @keyword scale: C{True} if C{A} should be scaled to unit variance in each component before processing @type scale: boolean @keyword cov: C{True} if C{A} represents a covariance matrix. This should typically be used with C{center=False}, C{scale=False}, and C{frac=1.0}. @type cov: boolean ''' self.d = A.shape[0] self.N = A.shape[1] self.center = center self.scale = scale self.tol = tol A, self.mean, self.std = utils.centerscale(A, center, scale) self.mean = self.mean.ravel() self.std = self.std.ravel() if (cov == False): self.U, self.D = np.linalg.svd(A)[:2] self.D *= self.D self.D /= self.N else: self.D, self.U = np.linalg.eigh(A) idx = self.D.argsort()[::-1] self.D = self.D[idx] self.U = self.U[:, idx] self.r = np.count_nonzero(self.D) self.props = np.zeros(self.r) self.props[0] = self.D[0] for i in range(1, self.r): self.props[i] = self.props[i - 1] + self.D[i] self.props /= self.props[self.r - 1] self.setfrac(frac)
def __init__(self, A, classes, labels, center=True, scale=True): ''' Perform multi-class linear discriminant analysis on C{A}. @param A: data (d x N) @type A: 2-dimensional NumPy array @param classes: class names (k) @type classes: list @param labels: class membership of each point (N) @type labels: list @keyword center: C{True} if C{A} should be centred to zero mean before processing @type center: boolean @keyword scale: C{True} if C{A} should be scaled to unit variance in each component before processing @type scale: boolean ''' self.d = A.shape[0] self.N = A.shape[1] k = len(classes) self.center = center self.scale = scale A, self.mean, self.std = utils.centerscale(A, center, scale) self.mean = self.mean.ravel() self.std = self.std.ravel() mean = np.sum(A, axis=1) / self.N self.aveclasscov = np.zeros((self.d, self.d), dtype=np.float) self.classmeans = np.zeros((self.d, k), dtype=np.float) self.sizes = np.zeros((k, ), dtype=np.float) sbCalc = np.zeros((self.d, k), dtype=np.float) for i in range(k): idx = np.where(labels == classes[i])[0] B = A[:, idx] n = idx.shape[0] p = n / self.N self.sizes[i] = n self.aveclasscov += np.cov(B, bias=1) * p self.classmeans[:, i] = np.sum(B, axis=1) / n sbCalc[:, i] = (self.classmeans[:, i] - mean) * np.sqrt(p) self.pca1 = PCA(self.aveclasscov, 1.0, False, False, True, tol=0.1e-5) self.pca2 = PCA(self.pca1.transform(sbCalc, True), 1.0, False, False, False) self.W = self.pca2.transform(self.pca1.transform(np.eye(self.d), True)).T
def __init__(self, A, frac=1.0, center=True, scale=True, cov=False, tol=1e-8): ''' Perform the initial principal components calculation, and store information for future queries. Note that your principal components should not include any axes corresponding to zero eigenvalues. @param A: Original data matrix (d x N) @type A: 2-dimensional NumPy array @keyword frac: The initial proportion of variance to be explained. This is used to select the number of principal components, using L{setfrac}. Should be between 0 and 1 (inclusive). @type frac: float @keyword center: C{True} if C{A} should be centred to zero mean before processing @type center: boolean @keyword scale: C{True} if C{A} should be scaled to unit variance in each component before processing @type scale: boolean @keyword cov: C{True} if C{A} represents a covariance matrix. This should typically be used with C{center=False}, C{scale=False}, and C{frac=1.0}. @type cov: boolean ''' self.d = A.shape[0] self.N = A.shape[1] self.center = center self.scale = scale self.tol = tol A, self.mean, self.std = utils.centerscale(A,center,scale) self.mean = self.mean.ravel() self.std = self.std.ravel() if(cov == False): self.U, self.D = np.linalg.svd( A )[:2] self.D *= self.D self.D /= self.N else: self.D, self.U = np.linalg.eigh( A ) idx = self.D.argsort()[::-1] self.D = self.D[idx] self.U = self.U[:,idx] self.r = np.count_nonzero(self.D) self.props = np.zeros(self.r); self.props[0] = self.D[0] for i in range(1,self.r): self.props[i] = self.props[i-1] + self.D[i] self.props /= self.props[self.r-1] self.setfrac(frac)
def __init__(self, A, classes, labels, center=True, scale=True): ''' Perform multi-class linear discriminant analysis on C{A}. @param A: data (d x N) @type A: 2-dimensional NumPy array @param classes: class names (k) @type classes: list @param labels: class membership of each point (N) @type labels: list @keyword center: C{True} if C{A} should be centred to zero mean before processing @type center: boolean @keyword scale: C{True} if C{A} should be scaled to unit variance in each component before processing @type scale: boolean ''' self.d = A.shape[0] self.N = A.shape[1] k = len(classes) self.center = center self.scale = scale A, self.mean, self.std = utils.centerscale(A,center,scale) self.mean = self.mean.ravel() self.std = self.std.ravel() mean = np.sum(A,axis=1)/self.N self.aveclasscov = np.zeros((self.d, self.d), dtype=np.float) self.classmeans = np.zeros((self.d, k), dtype=np.float) self.sizes = np.zeros((k,), dtype=np.float) sbCalc = np.zeros((self.d, k), dtype=np.float) for i in range(k): idx = np.where(labels==classes[i])[0] B = A[:,idx] n = idx.shape[0] p = n/self.N self.sizes[i] = n self.aveclasscov += np.cov(B,bias=1) * p self.classmeans[:,i] = np.sum(B,axis=1)/n sbCalc[:,i] = (self.classmeans[:,i] - mean) * np.sqrt(p) self.pca1 = PCA(self.aveclasscov, 1.0, False, False, True, tol=0.1e-5) self.pca2 = PCA(self.pca1.transform(sbCalc, True), 1.0, False, False, False ) self.W = self.pca2.transform(self.pca1.transform(np.eye(self.d),True)).T