def _init_random_gaussians(self, X): n_samples = np.shape(X)[0] self.priors = (1/self.k) * np.ones(self.k) for i in range(self.k): params = {} params["mean"] = X[np.random.choice(range(n_samples))] params["cov"] = calculate_covariance_matrix(X) self.parameters.append(params)
def _init_random_gaussians(self, X): n_samples = np.shape(X)[0] self.priors = (1 / self.k) * np.ones(self.k) for i in range(self.k): params = {} params["mean"] = X[np.random.choice(range(n_samples))] params["cov"] = calculate_covariance_matrix(X) self.parameters.append(params)
def fit(self, X, y): # Separate data by class X1 = X[y == 0] X2 = X[y == 1] # Calculate the covariance matrices of the two datasets cov1 = calculate_covariance_matrix(X1) cov2 = calculate_covariance_matrix(X2) cov_tot = cov1 + cov2 # Calculate the mean of the two datasets mean1 = X1.mean(0) mean2 = X2.mean(0) mean_diff = np.atleast_1d(mean1 - mean2) # Determine the vector which when X is projected onto it best separates the # data by class. w = (mean1 - mean2) / (cov1 + cov2) self.w = np.linalg.pinv(cov_tot).dot(mean_diff)
def fit(self, X, y): # Separate data by class X1 = X[y == 0] X2 = X[y == 1] # Calculate the covariance matrices of the two datasets cov1 = calculate_covariance_matrix(X1) cov2 = calculate_covariance_matrix(X2) cov_tot = cov1 + cov2 # Calculate the mean of the two datasets mean1 = X1.mean(0) mean2 = X2.mean(0) mean_diff = np.atleast_1d(mean1 - mean2) # Determine the vector which when X is projected onto it best separates the # data by class. w = (mean1 - mean2) / (cov1 + cov2) self.w = np.linalg.pinv(cov_tot).dot(mean_diff)
def _transform(self, X, dim): covariance = calculate_covariance_matrix(X) eigenvalues, eigenvectors = np.linalg.eig(covariance) # Sort eigenvalues and eigenvector by largest eigenvalues idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:dim] eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :dim] # Project the data onto principal components X_transformed = X.dot(eigenvectors) return X_transformed
def transform(slef, X, n_components): covariance = calculate_covariance_matrix(X) eigenvalues, eigenvectors = np.linalg.eig(covariance) print(np.diag(eigenvalues)) print(eigenvectors * covariance * eigenvectors.T) idx = eigenvalues.argsort()[::-1] # sort the eigenvalue, pick the the n_components latgest eigenvalues. eigenvalues = eigenvalues[idx][:n_components] eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components] X_transformed = X.dot(eigenvectors) return X_transformed
def transform(self, X, n_components): covariance = calculate_covariance_matrix(X) # Get the eigenvalues and eigenvectors. # (eigenvector[:,0] corresponds to eigenvalue[0]) eigenvalues, eigenvectors = np.linalg.eig(covariance) # Sort the eigenvalues and corresponding eigenvectors from largest # to smallest eigenvalue and select the first n_components idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:n_components] eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components] # Project the data onto principal components X_transformed = X.dot(eigenvectors) return X_transformed
def transform(self, X, n_components): covariance = calculate_covariance_matrix(X) # Get the eigenvalues and eigenvectors. # (eigenvector[:,0] corresponds to eigenvalue[0]) eigenvalues, eigenvectors = np.linalg.eig(covariance) # Sort the eigenvalues and corresponding eigenvectors from largest # to smallest eigenvalue and select the first n_components idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:n_components] eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components] # Project the data onto principal components X_transformed = X.dot(eigenvectors) return X_transformed
def _calculate_scatter_matrices(self, X, y): n_features = np.shape(X)[1] labels = np.unique(y) # Within class scatter matrix: # SW = sum{ (X_for_class - mean_of_X_for_class)^2 } SW = np.empty((n_features, n_features)) for label in labels: _X = X[y == label] SW += (len(_X) - 1) * calculate_covariance_matrix(_X) # Between class scatter: # SB = sum{ n_samples_for_class * (mean_for_class - total_mean)^2 } total_mean = np.mean(X, axis=0) SB = np.empty((n_features, n_features)) for label in labels: _X = X[y == label] _mean = np.mean(_X, axis=0) SB += len(_X) * (_mean - total_mean).dot((_mean - total_mean).T) return SW, SB
def _calculate_scatter_matrices(self, X, y): n_features = np.shape(X)[1] labels = np.unique(y) # Within class scatter matrix: # SW = sum{ (X_for_class - mean_of_X_for_class)^2 } SW = np.empty((n_features, n_features)) for label in labels: _X = X[y == label] SW += (len(_X) - 1) * calculate_covariance_matrix(_X) # Between class scatter: # SB = sum{ n_samples_for_class * (mean_for_class - total_mean)^2 } total_mean = np.mean(X, axis=0) SB = np.empty((n_features, n_features)) for label in labels: _X = X[y == label] _mean = np.mean(_X, axis=0) SB += len(_X) * (_mean - total_mean).dot((_mean - total_mean).T) return SW, SB
def get_covariance(self, X): # Calculate the covariance matrix for the data covariance = calculate_covariance_matrix(X) return covariance
# Import helper functions dir_path = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, dir_path + "/../utils") from data_operation import calculate_covariance_matrix, calculate_correlation_matrix from data_manipulation import normalize # Load dataset and only use the two first classes data = datasets.load_iris() X = normalize(data.data[data.target < 2]) y = data.target[data.target < 2] X1 = X[y == 0] X2 = X[y == 1] # Calculate the covariances of the two class distributions cov1 = calculate_covariance_matrix(X1) cov2 = calculate_covariance_matrix(X2) cov_tot = cov1 + cov2 # Get the means of the two class distributions mean1 = X1.mean(0) mean2 = X2.mean(0) mean_diff = np.atleast_1d(mean1 - mean2) # Calculate w as (x1_mean - x2_mean) / (cov1 + cov2) w = np.linalg.pinv(cov_tot).dot(mean_diff) # Project X onto w x1 = X.dot(w) x2 = X.dot(w)
def get_covariance(self, X): # Calculate the covariance matrix for the data covariance = calculate_covariance_matrix(X) return covariance
# Import helper functions dir_path = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, dir_path + "/../utils") from data_operation import calculate_covariance_matrix, calculate_correlation_matrix from data_manipulation import normalize # Load dataset and only use the two first classes data = datasets.load_iris() X = normalize(data.data[data.target < 2]) y = data.target[data.target < 2] X1 = X[y == 0] X2 = X[y == 1] # Calculate the covariances of the two class distributions cov1 = calculate_covariance_matrix(X1) cov2 = calculate_covariance_matrix(X2) cov_tot = cov1 + cov2 # Get the means of the two class distributions mean1 = X1.mean(0) mean2 = X2.mean(0) mean_diff = np.atleast_1d(mean1 - mean2) # Calculate w as (x1_mean - x2_mean) / (cov1 + cov2) w = np.linalg.pinv(cov_tot).dot(mean_diff) # Project X onto w x1 = X.dot(w) x2 = X.dot(w)