def benchmark_classifiers_on_images(path_input, path_output, mask, resize_H, resize_W, grayscaled, use_cashed_predictions=True): C1 = classifier_SVM.classifier_SVM() C2 = classifier_RF.classifier_RF() C3 = classifier_LM.classifier_LM() C4 = classifier_KNN.classifier_KNN() Classifiers = [C1, C2, C3, C4] for i in range(0, len(Classifiers)): folder_predictions = path_output + 'predictions/' + Classifiers[ i].name + '/' filename_predictions = folder_predictions + Classifiers[ i].name + '_predictions.txt' if use_cashed_predictions == False or not os.path.isfile( filename_predictions): ML = tools_ML.tools_ML(Classifiers[i]) ML.E2E_images(path_input, folder_predictions, mask=mask, resize_W=resize_W, resize_H=resize_H, grayscaled=grayscaled, verbose=False) fig = plt.figure(figsize=(12, 6)) fig.subplots_adjust(hspace=0.01) for i in range(0, len(Classifiers)): folder_predictions = path_output + 'predictions/' + Classifiers[ i].name + '/' filename_predictions = folder_predictions + Classifiers[ i].name + '_predictions.txt' if os.path.isfile(filename_predictions): tools_IO.plot_confusion_mat(plt.subplot(2, 2, i + 1), fig, filename_mat=filename_predictions, caption=Classifiers[i].name) plt.tight_layout() plt.show() return
def benchmark_classifiers_on_extractor(use_cashed_predictions=True): path_features = 'data/features-natural/FC/' path_output = 'data/features-natural/' C1 = classifier_XGBoost.classifier_XGBoost() C2 = classifier_SVM.classifier_SVM() C3 = classifier_RF.classifier_RF() C4 = classifier_LM.classifier_LM() C5 = classifier_Gauss2.classifier_Gauss2() C6 = classifier_Bayes2.classifier_Bayes2() C7 = classifier_KNN.classifier_KNN() #C8 = classifier_FC_Keras.classifier_FC_Keras() #Classifiers = [C1, C2, C3, C4, C5, C6, C7, C8] Classifiers = [C2, C3, C4, C7] for i in range(0, len(Classifiers)): folder_predictions = path_output + 'predictions/' + Classifiers[ i].name + '/' filename_predictions = folder_predictions + Classifiers[ i].name + '_predictions.txt' if use_cashed_predictions == False or not os.path.isfile( filename_predictions): ML = tools_ML.tools_ML(Classifiers[i]) ML.E2E_features(path_features, folder_predictions, limit_classes=20, limit_instances=100) fig = plt.figure(figsize=(7, 6)) fig.subplots_adjust(hspace=0.01) for i in range(0, len(Classifiers)): folder_predictions = path_output + 'predictions/' + Classifiers[ i].name + '/' filename_predictions = folder_predictions + Classifiers[ i].name + '_predictions.txt' if os.path.isfile(filename_predictions): tools_IO.plot_confusion_mat(plt.subplot(2, 2, i + 1), fig, filename_mat=filename_predictions, caption=Classifiers[i].name) plt.tight_layout() plt.show() return
def benchmark_extractors(): mask = '*.jpg' path_input = 'data/ex-natural' path_output = 'data/features/' E1 = CNN_AlexNet_TF.CNN_AlexNet_TF() E2 = CNN_Inception_TF.CNN_Inception_TF() #E3 = CNN_VGG16_Keras.CNN_VGG16_Keras() Extractors = [E1, E2] Classifier = classifier_RF.classifier_RF() ML = tools_ML.tools_ML(Classifier) fig = plt.figure(figsize=(12, 6)) fig.subplots_adjust(hspace=0.01) for i in range(0, len(Extractors)): path_features = path_output + Extractors[i].name if not os.path.exists(path_features): Extractors[i].generate_features(path_input, path_features, mask=mask, limit=200) folder_predictions = path_output + 'predictions/' + Extractors[ i].name + '/' filename_predictions = folder_predictions + Classifier.name + '_predictions.txt' if not os.path.isfile(filename_predictions): ML.E2E_features(path_features, folder_predictions, limit_classes=20, limit_instances=100) tools_IO.plot_confusion_mat(plt.subplot(1, 3, i + 1), fig, filename_mat=filename_predictions, caption=Classifier.name + ' + ' + Extractors[i].name) plt.tight_layout() plt.show() return
def stage_train_stats(self, path_output, labels_fact, labels_train_pred, labels_test_pred, labels_train_prob, labels_test_prob, patterns, X=None, Y=None, verbose=False): labels_pred = numpy.hstack((labels_train_pred, labels_test_pred)) labels_prob = numpy.hstack((labels_train_prob, labels_test_prob)) predictions = numpy.array( [patterns[labels_fact], patterns[labels_pred], labels_prob]).T tools_IO.save_mat( predictions, path_output + self.classifier.name + '_predictions.txt') tools_IO.print_accuracy(labels_fact, labels_pred, patterns) tools_IO.print_accuracy(labels_fact, labels_pred, patterns, filename=path_output + self.classifier.name + '_confusion_mat.txt') tools_IO.print_top_fails(labels_fact, labels_pred, patterns, filename=path_output + self.classifier.name + '_errors.txt') tools_IO.print_reject_rate(labels_fact, labels_pred, labels_prob, filename=path_output + self.classifier.name + '_accuracy.txt') if verbose == True: #verbose_PCA = True if (X is not None) and (Y is not None) else False verbose_PCA = False if verbose_PCA: print('Extracting features for PCA') features = self.classifier.images_to_features(X) fig = plt.figure(figsize=(12, 6)) fig.subplots_adjust(hspace=0.01) if verbose_PCA: tools_IO.plot_features_PCA(plt.subplot(1, 3, 1), features, Y, patterns) tools_IO.plot_learning_rates1(plt.subplot(1, 3, 2), fig, filename_mat=path_output + self.classifier.name + '_learn_rates.txt') tools_IO.plot_confusion_mat(plt.subplot(1, 3, 3), fig, filename_mat=path_output + self.classifier.name + '_predictions.txt', caption=self.classifier.name) else: tools_IO.plot_learning_rates1(plt.subplot(1, 2, 1), fig, filename_mat=path_output + self.classifier.name + '_learn_rates.txt') tools_IO.plot_confusion_mat(plt.subplot(1, 2, 2), fig, filename_mat=path_output + self.classifier.name + '_predictions.txt', caption=self.classifier.name) plt.tight_layout() plt.show() return
def E2E_features(self, path_input, path_output, mask='.txt', limit_classes=1000000, limit_instances=1000000, has_header=True, has_labels_first_col=True): print('E2E train-test on features: classifier=%s\n\n' % (self.classifier.name)) if not os.path.exists(path_output): os.makedirs(path_output) patterns = numpy.unique( numpy.array([ f.path[len(path_input):].split(mask)[0] for f in os.scandir(path_input) if f.is_file() ]))[:limit_classes] (X, Y, filenames) = self.prepare_arrays_from_feature_files( path_input, patterns, feature_mask=mask, limit=limit_instances, has_header=has_header, has_labels_first_col=has_labels_first_col) idx_train = numpy.sort( numpy.random.choice(X.shape[0], int(X.shape[0] / 2), replace=False)) idx_test = numpy.array( [x for x in range(0, X.shape[0]) if x not in idx_train]) if has_header: header = tools_IO.load_mat(path_input + ('%s%s' % (patterns[0], mask)), numpy.chararray, delim='\t') header = header[0] else: header = None X = normalize(X) min = numpy.min(X) X -= min max = numpy.max(X) X *= 255.0 / (max) (labels_train_pred, labels_train_prob, challangers_train, challangers_train_prob, labels_test_pred,labels_test_prob, challangers_test, challangers_test_prob) = \ self.train_test(X, Y, idx_train, idx_test) labels_fact = numpy.hstack((Y[idx_train], Y[idx_test])) self.stage_train_stats(path_output, labels_fact, labels_train_pred, labels_test_pred, labels_train_prob, labels_test_prob, patterns) N = 3 if not has_header: N -= 1 fig = plt.figure(figsize=(12, 6)) fig.subplots_adjust(hspace=0.01) tools_IO.plot_features_PCA(plt.subplot(1, N, 1), X, Y, patterns) tools_IO.plot_confusion_mat( plt.subplot(1, N, 2), fig, path_output + self.classifier.name + '_predictions.txt', self.classifier.name) if has_header: tools_IO.plot_feature_importance(plt.subplot(1, N, 3), fig, X, Y, header) plt.tight_layout() plt.savefig(path_output + 'fig_roc.png') return