def strategy01(X_train, labels_train, X_test, labels_test): ''' Estrategia número 1 para el primer clasificador ''' print('\nEjecutando la estrategia número 1 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 50 X_train, _, A, Xm, _ = pca(X_train, n_components=50) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy02(X_train, labels_train, X_test, labels_test): ''' Estrategia número 2 para el primer clasificador ''' print('\nEjecutando la estrategia número 2 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: PCA de 70 componentes # > Training: 5040 x 70 X_train, _, A, Xm, _ = pca(X_train, n_components=70) # Paso 3: Normalizacion # > Training: 5040 x 70 X_train, a, b = normalize(X_train) # Paso 4: SFS # > Training: 5040 x 20 s_sfs = sfs(X_train, labels_train, n_features=20, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = np.matmul(X_test - Xm, A) # Paso 2: PCA X_test = X_test * a + b # Paso 3: Normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy04(X_train, labels_train, X_test, labels_test): ''' Estrategia número 4 para el primer clasificador ''' print('\nEjecutando la estrategia número 4 del primer clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 26 s_sfs = sfs(X_train, labels_train, n_features=26, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS return classifier_tests(X_train, labels_train, X_test, labels_test)
def strategy01(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 1 para el segundo clasificador ''' print('\nEjecutando la estrategia número 1 del segundo clasificador...') # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def WinnerStrategy(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia Número 1 con redes neuronales, Reescrita para poder obtener estadísticas ''' # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS classifier = MLPClassifier(alpha=1, max_iter=1000, random_state=2) results = {} Y_pred = np.array([]) labels_test = np.array([]) # Probamos con los valores de testing classifier.fit(X_train, labels_train) for sample in groups['test']: patch_data = np.array([]) for patch in groups['test'][sample]: features = X_test[groups['test'][sample][patch], :].reshape(1, -1) patch_data = np.append(patch_data, classifier.predict(features)[0]) # Clase clasificada Y_pred = np.append(Y_pred, stats.mode(patch_data)[0][0]) labels_test = np.append(labels_test, get_class_by_name(sample)) results['Accuracy'] = performance(Y_pred, labels_test) * 100 results['Y_pred'] = Y_pred results['labels_test'] = labels_test return results
def strategy05(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 5 para el segundo clasificador ''' print('\nEjecutando la estrategia número 5 del segundo clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: PCA # > Training: 5040 x 82 X_train, _, A1, Xm1, _ = pca(X_train, n_components=X_train.shape[1]) # Paso 3: Normalizacion # > Training: 5040 x 82 X_train, a, b = normalize(X_train) # Paso 4: SFS # > Training: 5040 x 80 s_sfs = sfs(X_train, labels_train, n_features=80, method="fisher") X_train = X_train[:, s_sfs] X_train_sfs80 = X_train.copy() # Paso 5: PCA # > Training: 5040 x 10 X_train, _, A2, Xm2, _ = pca(X_train, n_components=10) #Paso 6: SFS # > Trainning: 5040 x 20 X_train = np.concatenate((X_train, X_train_sfs80), axis=1) s_sfs2 = sfs(X_train, labels_train, n_features=20, method="fisher") X_train = X_train[:, s_sfs2] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: clean X_test = np.matmul(X_test - Xm1, A1) # Paso 2: PCA X_test = X_test * a + b # Paso 3: normalizacion X_test = X_test[:, s_sfs] # Paso 4: SFS X_test_sfs80 = X_test.copy() X_test = np.matmul(X_test - Xm2, A2) # Paso 5: PCA X_test = np.concatenate((X_test, X_test_sfs80), axis=1) X_test = X_test[:, s_sfs2] # Paso 6: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
def WinnerStrategy(X_train, labels_train, X_test, labels_test): ''' Estrategia Número 1 con redes neuronales, Reescrita para poder obtener estadísticas ''' # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Cleaning de los datos # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 3: Selección de características # Acá se utilizó el criterio de fisher # > Training: 5040 x 50 s_sfs = sfs(X_train, labels_train, n_features=50, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 50 X_train, _, A, Xm, _ = pca(X_train, n_components=50) # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA classifier = MLPClassifier(alpha=1, max_iter=1000, random_state=2) results = {} # Clasificamos las muestras de Testing classifier.fit(X_train, labels_train) Y_pred = classifier.predict(X_test) accuracy = performance(Y_pred, labels_test) results['Accuracy'] = accuracy * 100 results['Y_pred'] = Y_pred results['labels_test'] = labels_test return results
def strategy03(X_train, labels_train, X_test, labels_test, groups): ''' Estrategia número 3 para el segundo clasificador ''' print('\nEjecutando la estrategia número 3 del segundo clasificador...') # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 1: Clean # > Training: 5040 x 82 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 2: Normalizacion # > Training: 5040 x 82 X_train, a, b = normalize(X_train) # Paso 3: SFS # > Training: 5040 x 80 s_sfs = sfs(X_train, labels_train, n_features=80, method="fisher") X_train = X_train[:, s_sfs] # Paso 4: PCA # > Training: 5040 x 20 X_train, _, A, Xm, _ = pca(X_train, n_components=20) # Paso 5: SFS # > Training: 5040 x 15 s_sfs2 = sfs(X_train, labels_train, n_features=15, method="fisher") X_train = X_train[:, s_sfs2] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 1: Clean X_test = X_test * a + b # Paso 2: Normalizacion X_test = X_test[:, s_sfs] # Paso 3: SFS X_test = np.matmul(X_test - Xm, A) # Paso 4: PCA X_test = X_test[:, s_sfs2] # Paso 5: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** return classifier_tests(X_train, labels_train, X_test, labels_test, groups)
data = loadmat('set05-face-detection.mat') X, d = data["f"], data["d"].squeeze() # Paso 1: Data Selection: Primer 80% Training Restante 20% Testing # > Training: 211 x 1589 # > Testing : 53 x 1589 X_train, X_test, d_train, d_test = train_test_split(X, d, test_size=0.2, shuffle=False) # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 2-Training: Clean # > Training: 211 x 387 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 3-Training: Normalizacion # > Training: 211 x 387 X_train, a, b = normalize(X_train) # Paso 4-Training: SFS # > Training: 211 x 40 s_sfs = sfs(X_train, d_train, n_features=40, method="fisher", show=True) X_train = X_train[:, s_sfs] # Paso 5-Training: PCA # > Training: 211 x 10 X_train, _, A, Xm, _ = pca(X_train, n_components=10)
def main(): ''' Desarrollo de el flujo del programa de reconocimiento de paredes rayadas La metodología utilizada es la que se describe en: https://github.com/domingomery/patrones/blob/master/clases/Cap03_Seleccion_de_Caracteristicas/presentations/PAT03_GeneralSchema.pdf La estructura del código se basó en el código de ejemplo de la actividad en clases: https://github.com/domingomery/patrones/tree/master/clases/Cap03_Seleccion_de_Caracteristicas/ejercicios/PCA_SFS ''' # Paso 1: Extracción de características # > 4000 imágenes de training rayadas # > 4000 imágenes de training no rayadas # > 1000 imágenes de testing rayadas # > 1000 imágenes de testing no rayadas # > 357 características por imagen features = FeatureExtractor(classes=CLASSES) # Paso 2: Definición de datos training - testing # > Training: 8000 x 357 # > Testing: 2000 x 357 X_train, labels_train = features['feature_values_train'], features[ 'labels_train'] X_test, labels_test = features['feature_values_test'], features[ 'labels_test'] X_train, labels_train = np.array(X_train), np.array(labels_train) X_test, labels_test = np.array(X_test), np.array(labels_test) # *** DEFINCION DE DATOS PARA EL TRAINING *** # Paso 3: Cleaning de los datos # > Training: 8000 x 250 s_clean = clean(X_train) X_train = X_train[:, s_clean] # Paso 4: Normalización Mean-Std de los datos X_train, a, b = normalize(X_train) # Paso 5: Selección de características # Acá se utilizó el criterio de fisher # > Training: 8000 x 50 s_sfs = sfs(X_train, labels_train, n_features=50) X_train = X_train[:, s_sfs] # *** DEFINCION DE DATOS PARA EL TESTING *** X_test = X_test[:, s_clean] # Paso 3: clean X_test = X_test * a + b # Paso 4: normalizacion X_test = X_test[:, s_sfs] # Paso 5: SFS # *** ENTRENAMIENTO CON DATOS DE TRAINING Y PRUEBA CON DATOS DE TESTING *** knn = KNN(n_neighbors=3) knn.fit(X_train, labels_train) Y_pred = knn.predict(X_test) # *** Estadísticas y desempeño del clasificador *** accuracy = performance(Y_pred, labels_test) print("Accuracy = " + str(accuracy)) confusionMatrix(Y_pred, labels_test) printChoosenFeatures = True if printChoosenFeatures: feature_names = np.array(features['feature_names']) feature_names = feature_names[s_sfs] print('Las features seleccionadas por el sistema son:') for name in feature_names: print(name, end=' -- ') # *** Guardado de las variables para el reconocedor externo *** with open('data/reconocedor.json', 'w') as file: file.write( json.dumps({ 's_clean': s_clean.tolist(), 'a': a.tolist(), 'b': b.tolist(), 's_sfs': s_sfs.tolist() }))
X0_train = extract_features('Training_0', 'png') X1_train = extract_features('Training_1', 'png') X0_test = extract_features('Testing_0', 'png') X1_test = extract_features('Testing_1', 'png') # Training dataset print('Training Subset:') X_train = np.concatenate((X0_train, X1_train), axis=0) d0_train = np.zeros([X0_train.shape[0], 1], dtype=int) d1_train = np.ones([X1_train.shape[0], 1], dtype=int) d_train = np.concatenate((d0_train, d1_train), axis=0) print('Original extracted features: ' + str(X_train.shape[1]) + '(' + str(X_train.shape[0]) + ' samples)') # Training: Cleaning sclean = clean(X_train, show=True) X_train_clean = X_train[:, sclean] print(' cleaned features: ' + str(X_train_clean.shape[1]) + '(' + str(X_train_clean.shape[0]) + ' samples)') # Training: Normalization X_train_norm, a, b = normalize(X_train_clean) print(' normalized features: ' + str(X_train_norm.shape[1]) + '(' + str(X_train_norm.shape[0]) + ' samples)') # Training: Feature selection ssfs = sfs(X_train_norm, d_train, n_features=20, method="fisher", show=True) X_train_sfs = X_train_norm[:, ssfs] print(' selected features: ' + str(X_train_sfs.shape[1]) + '(' + str(X_train_sfs.shape[0]) + ' samples)')
Ytest.append(ID) else: Xtrain.append(lbp) Ytrain.append(ID) # Transformamos a numpy arrays para que sea más fácil de trabajar (por pybalu) Xtrain = np.array(Xtrain) Ytrain = np.array(Ytrain) Xtest = np.array(Xtest) Ytest = np.array(Ytest) before = len(Xtrain[0]) # Realizamos la función clean previo a la selection de features mediante sfs, # como se nos fue recomendados en clase p_clean = clean(Xtrain) after = len(p_clean) print(f"Cleaned.\nBefore:{before} features\nNow: {after} features") # Después de obtener qué features usaremos, modificamos nuestro dataset a solo # contener esas features Xtrain_cleaned = np.array([[x[i] for i in p_clean] for x in Xtrain]) Xtest_cleaned = np.array([[x[i] for i in p_clean] for x in Xtest]) # Un bug en el código de sfs no permite que los las clases se salten # enteros (1,3,5,..). Modificamos esto solo para correr sfs Ytrain_sfs = np.array([int((y - 1) / 2) for y in Ytrain]) # Obtenemos qué features sfs nos ha seleccionado p_sfs = sfs(Xtrain_cleaned, Ytrain_sfs, 100, show=True)
def clean_norm(X): sclean = clean(X, show=True) X = X[:, sclean] X, a, b = normalize(X) return X, sclean, a, b