def learn(X, Y): pca = None dictionary = None model = None # Data augmentation if DO_DATA_AUGMENTATION: print("Augmenting data") X, Y = transform_T(X, Y) print("Number of samples augmented to {}".format(X.shape[0])) # Dictionary learning if DO_DICTIONARY_LEARNING: dictionary = Dictionary(n_atoms=128, atom_width=16) if dictionary.weights_available: print("Loading dictionary") dictionary.load() else: print("Learning dictionary") tic = time.time() dictionary.fit(X) dictionary.save() print("Dictionary learned in {0:.1f}s".format(time.time() - tic)) print("Getting dictionary representation") X = dictionary.get_representation(X) # PCA if DO_PCA: tic = time.time() print("Applying PCA") n_components = 100 pca = PCA(n_components=n_components) X = pca.fit(X, scale=False) print("Variance explained: {:.2f}".format(np.sum(pca.e_values_ratio_))) print("PCA applied in {0:.1f}s".format(time.time() - tic)) # Training print("Starting training") tic = time.time() model = KernelSVM(C=1, kernel='rbf') model.train(X, Y) print("Model trained in {0:.1f}s".format(time.time() - tic)) return pca, dictionary, model
def create(data_manager, components_limit=0): ''' Creates statistical model by aligning models and performing PCA :param data_manager: data manager providing training data :param components_limit: Limit how much PCA components should be found (0 == all) :return: resulting PCA ''' teeth = data_manager.get_all_teeth(True) mean_shape = deepcopy(teeth[0]) assert isinstance(mean_shape, Tooth) mean_shape.move_to_origin() mean_shape.normalize_shape() error = float("inf") while error > 0.05: meanAcum = np.zeros(mean_shape.landmarks.shape) for i in range(0, len(teeth)): teeth[i].align(mean_shape) meanAcum += teeth[i].landmarks new_mean_shape = Tooth(meanAcum / len(teeth)) new_mean_shape.align(mean_shape) error = new_mean_shape.sum_of_squared_distances(mean_shape) mean_shape = new_mean_shape # Realign all teeth with final mean shape for i in range(0, len(teeth)): teeth[i].align(mean_shape) data = np.zeros((len(teeth), teeth[0].landmarks.size)) for i, tooth in enumerate(teeth): data[i, :] = tooth.landmarks.flatten() pca = PCA() pca.train(deepcopy(data), components_limit) return pca
def phase1(self, data, trueLabels): step1ResultsFolder = Path(self.config["resultsDir"]) / "phase1" step1ResultsFolder.mkdir(exist_ok=True, parents=True) # step 2 plot the original dataset (picked N_COMPONENTS dims with highest variance) stds = data.std(axis=0) dims = np.argsort(stds)[::-1][:N_COMPONENTS] Visualizer.labeledScatter3D(data[:, dims], trueLabels, path=step1ResultsFolder / f"{N_COMPONENTS}_dims_originalScatter.png") # step 3 dataMean = np.mean(data, axis=0) print(f"Original data mean: {dataMean}") # step 4, 5, 6, 7 pca = PCA(n_components=N_COMPONENTS, print_=True) reducedData = pca.fit_transform(data) # step 8 Visualizer.labeledScatter3D(reducedData, trueLabels, path=step1ResultsFolder / f"{N_COMPONENTS}_dims_pcaScatter.png") # step 9 reconstructedData = pca.inverse_transform(reducedData) Visualizer.labeledScatter3D(reconstructedData[:, dims], trueLabels, path=step1ResultsFolder / f"{N_COMPONENTS}_dims_reconstructedScatter.png") return reducedData
from src.pca import PCA # custom One-Versus-Rest SVM from src.ovr import OVR SHAPE = (46, 56) M = 121 standard = False data = fetch_data(ratio=0.8) X_train, y_train = data['train'] D, N = X_train.shape pca = PCA(n_comps=M, standard=standard) W_train = pca.fit(X_train) X_test, y_test = data['test'] I, K = X_test.shape W_test = pca.transform(X_test) params = {'C': 1, 'gamma': 2e-4, 'kernel': 'linear'} ovr = OVR(**params) ovr.fit(W_train, y_train) y_hat = ovr.predict(W_test[::-1]).ravel()
# return features and labels return unsupervisedFeatures, y def parseArguments(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--jsonConfig", type=Path, required=True) return parser.parse_args() if __name__ == "__main__": args = parseArguments() with open(args.jsonConfig, 'r') as f: config = json.load(f) step1ResultsFolder = Path(config["resultsDir"]) / "phase2" step1ResultsFolder.mkdir(exist_ok=True, parents=True) data, trueLabels = loadArffFile(Path(config["path"])) data = data.to_numpy() pca = PCA(data.shape[1]) reducedData = pca.fit_transform(data) print(len(pca.varianceRatios)) print(f"Explained Ratio Variance {np.round(pca.varianceRatios, 2)}") #print(f"Explained Ratio Variance {np.round(pca.explained_variance_ratio_, 2)}") #print(f"Sum {np.sum(pca.explained_variance_ratio_)}") #print(f"Mean {pca.mean_}") #print(f"Variance {ipca.var_}") #0print(f"Noise Variance {pca.noise_variance_}") #Visualizer.labeledScatter3D(reducedData, trueLabels, path=step1ResultsFolder / f"caca2_dims_pcaScatter.png")
# prettify plots plt.rcParams['figure.figsize'] = [12.0, 9.0] sns.set_palette(sns.color_palette("muted")) _palette = sns.color_palette("muted") sns.set_style("ticks") M = 121 standard = True data = fetch_data(ratio=0.8) X_train, y_train = data['train'] D, N = X_train.shape pca = PCA(n_comps=M, standard=standard) W_train = pca.fit(X_train) X_test, y_test = data['test'] I, K = X_test.shape W_test = pca.transform(X_test) params = {'gamma': 2e-4, 'kernel': 'linear'} fine = 5 # validate OVR C_ovr = np.logspace(-5, 10, fine) accuracy_ovr = []