def plot_iris(y, y_classes, maxit=25, *args, **kwargs): # np.random.seed(0) fig, ax = plot_grid(5) #Variational bayes vbpca = VBPCA(y, *args, **kwargs) for i in range(maxit): vbpca.update() plot_scatter(vbpca.transform(), y_classes, ax[0]) ax[0].set_title('VBPCA') #Laplace approximation lbpca = LBPCA(y.T) lbpca.fit(maxit) plot_scatter(lbpca.transform(2).T, y_classes, ax[1]) ax[1].set_title('LBPCA') #Streaming LBPCA stream = create_distributed(np.copy(y.T), 10) stream.randomized_fit(1) plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[2]) ax[2].set_title('Batch BPCA') #Distributed LBPCA stream = create_distributed(np.copy(y.T), 10) stream.averaged_fit(maxit) plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[3]) ax[3].set_title('Parallel BPCA') #PCA pca = PCA(y.T) plot_scatter(pca.fit_transform().T, y_classes, ax[4]) ax[4].set_title('PCA') plt.show()
def generate_pca_embedding_files(): ''' Generate PCA embedding csv files for the experiments. ''' raw = genfromtxt('digits-raw.csv', delimiter=',') X = raw[:, 2:] pca = PCA(10) X_new = pca.fit_transform(X) raw_new = hstack((raw[:, :2], X_new)) savetxt('digits-pca-embedding.csv', raw_new, delimiter=',')
def Bonus3(): ''' Scatter plot of samples projected onto the first two eigenvectors. ''' raw = genfromtxt('digits-raw.csv', delimiter=',') X = raw[:, 2:] pca = PCA(2) X_new = pca.fit_transform(X) perm = permutation(X.shape[0])[:1000] labels = array(raw[perm, 1], dtype=int) colors = rand(10, 3)[labels, :] scatter(X_new[perm, 0], X_new[perm, 1], c=colors, alpha=0.9, s=10) show()
def show_hinton_weights(data): np.set_printoptions(precision=3) lbpca = LBPCA(data) pca = PCA(data) # LBPCA iterations = 50 lbpca.fit_transform(iterations) weight = lbpca.W hinton(weight) figure = plt.gcf() figure.canvas.set_window_title('BPCA, iterations=' + str(iterations)) plt.title('BPCA') plt.show() # PCA weight = pca.fit_transform() pcs = pca.params hinton(pcs[:,:-1]) figure = plt.gcf() figure.canvas.set_window_title('PCA') plt.title('PCA') plt.show() # Streaming LBPCA iterations = 50 coord = create_distributed(data, 10) coord.randomized_fit(iterations) weight = coord.W hinton(weight) figure = plt.gcf() figure.canvas.set_window_title('Batch BPCA') plt.title('Batch BPCA') plt.show() # Distributed LBPCA iterations = 50 coord = create_distributed(data, 10) coord.averaged_fit(iterations) weight = coord.W hinton(weight) figure = plt.gcf() figure.canvas.set_window_title('Parallel BPCA, iterations=' + str(iterations)) plt.title('Parallel BPCA') plt.show()
def main(): datafile = "data.txt" data = loaddata(datafile) k = 2 pca = PCA(k) return pca.fit_transform(data)
import matplotlib.pyplot as plt import seaborn as sns from pca import PCA #Read file columns = [ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class' ] iris = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None, names=columns) #Extract features features = iris.drop('class', 1) #Apply pca in data p = PCA(k=3) p = p.fit_transform(features) #Create a dataframe with new data names = ['pc1', 'pc2', 'pc3', 'pc4'] principalDf = pd.DataFrame(data=p, columns=names[0:p.shape[1]]) #Concat with class finalDf = pd.concat([principalDf, iris['class']], axis=1) #Show the new space sns.pairplot(data=finalDf, hue='class', diag_kind='kde') plt.show()
data = data.drop(test_data.index) X_train = data.loc[:, 'Alcohol':] y_train = data['target'] X_test = test_data.loc[:, 'Alcohol':] y_test = test_data['target'] target_names = [str(i) for i in np.unique(y_train)] print() print('test data of class', y_test) """ scale """ scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) """ PCA """ pca = PCA() pca.fit_transform(X_train) pca.add_data(X_test) plt = pca.plot(y_train, target_names, title='PCA of Wine dataset', plot_ellipse=True) plt.savefig('output/pca_wine.png') plt.show()
labels.append(label) abs_path = dir + "/" + file print("[INFO] Reading file : " + abs_path) img = cv2.imread(abs_path) hog_emb, grad_magnitude = hog(img) hog_embeddings.append(hog_emb) print("-----------------------------------------------------------") print("[INFO] Implementing Principal component analysis ... ") hog_embeddings = np.array(hog_embeddings) labels = np.array(labels) hog_embeddings = pca.fit_transform(hog_embeddings) pickle.dump(hog_embeddings, open("hog_embeddings.pickle", "wb")) pickle.dump(labels, open("labels.pickle", "wb")) else: hog_embeddings = pickle.load(open("hog_embeddings.pickle", "rb")) labels = pickle.load(open("labels.pickle", "rb")) print("-------------------------------------------------------------") print("[INFO] Reading validation data") VAL_DIR = 'validation/' if (not os.path.exists("hog_embeddings_val.pickle") or not os.path.exists("labels_val.pickle")): for (dir, dirs, files) in os.walk(VAL_DIR): if (dir != VAL_DIR):
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img = cv2.resize(img, (30, 30), interpolation=cv2.INTER_AREA) #add to dataset after flattening into a row vector dataset.append(img.flatten()) return np.array(dataset) """First testing on some non image data""" #read data dataset = pd.read_csv("Wine.csv") x = dataset.iloc[:, :-1].values p = PCA(nb_components=2) x2 = p.fit_transform(x) print(p.get_variance_score()) #Visusalization time plt.scatter(x2[:, 0], x2[:, 1]) plt.plot() p = None """now image compression""" x = load_imgs() print("The x matrix contains {} images converted to row vectors of length {}". format(x.shape[0], x.shape[1])) #Now let's convert them to 25x25 size images. So the number of #principal components becomes 25*25 = 625
class EigenFaceRecognizer: pca = None labels = None trained_imgs = None i_to_label = defaultdict(int) def __init__(self): self.pca = PCA() self.labels = [] def train(self, mat, label): tmp = [] for i in range(len(label)): self.i_to_label[i] = label[i] if i != 0 and label[i] != label[i - 1]: self.labels.append(tmp) tmp = [] tmp.append((i, label[i])) self.labels.append(tmp) to_fit = [] for i in range(len(mat)): to_fit.append(np.ndarray.flatten(mat[i])) self.trained_imgs = self.pca.fit_transform(to_fit) def predict(self, img): input_img = img tmp_img = np.ndarray.flatten(img) tmp_img = np.array([tmp_img]) tmp_img = self.pca.transform(tmp_img) min_mean = 1e100 min_lable = 0 re_imgs = self.pca.inverse_transform(self.trained_imgs) re_imgs = re_imgs.astype(np.uint8) sum = 0 size = 0 for i in range(len(self.trained_imgs)): if i != 0 and self.i_to_label[i] != self.i_to_label[i - 1]: mean = sum / size if mean < min_mean: min_mean = mean min_lable = self.i_to_label[i - 1] sum = 0 size = 0 trained_img = self.trained_imgs[i] distance = self.dis(tmp_img, trained_img) size += 1 sum += distance #print(self.i_to_label[i], distance) mean = sum / size if mean < min_mean: min_mean = mean min_lable = self.i_to_label[len(self.trained_imgs) - 1] for i in self.i_to_label: if self.i_to_label[i] == min_lable: result_img = np.reshape(re_imgs[i], (100, 100)) tmp = np.concatenate((input_img, result_img)) break sum = 0 size = 0 return [min_lable, min_mean] def dis(self, a, b): return np.linalg.norm(a - b)
#Read wine wine = datasets.load_wine() wine = pd.DataFrame(data=np.c_[wine['data'], wine['target']], columns=wine['feature_names'] + ['target']) #Separate in features and target features = wine.iloc[:, 0:13] target = wine.iloc[:, 13] #Standardization dataset features = StandardScaler().fit_transform(features) #Creating pca pca = PCA(k=5) newFeatures1 = pca.fit_transform(features) #Creating adaptive pca aPCA = AdaptivePCA(13, 5, 100) newFeatures2 = aPCA.fit_transform(features) #Split dataset into train and test kf = KFold(n_splits=10) #Mlp model mlp = MLPClassifier(solver='adam', hidden_layer_sizes=(25, )) score = {'None': [], 'pca': [], 'aPca': []} #Run for each fold for train_index, test_index in kf.split(features):