コード例 #1
0
def benchmark_classifiers_on_images(path_input,
                                    path_output,
                                    mask,
                                    resize_H,
                                    resize_W,
                                    grayscaled,
                                    use_cashed_predictions=True):

    C1 = classifier_SVM.classifier_SVM()
    C2 = classifier_RF.classifier_RF()
    C3 = classifier_LM.classifier_LM()
    C4 = classifier_KNN.classifier_KNN()

    Classifiers = [C1, C2, C3, C4]

    for i in range(0, len(Classifiers)):
        folder_predictions = path_output + 'predictions/' + Classifiers[
            i].name + '/'
        filename_predictions = folder_predictions + Classifiers[
            i].name + '_predictions.txt'
        if use_cashed_predictions == False or not os.path.isfile(
                filename_predictions):
            ML = tools_ML.tools_ML(Classifiers[i])
            ML.E2E_images(path_input,
                          folder_predictions,
                          mask=mask,
                          resize_W=resize_W,
                          resize_H=resize_H,
                          grayscaled=grayscaled,
                          verbose=False)

    fig = plt.figure(figsize=(12, 6))
    fig.subplots_adjust(hspace=0.01)

    for i in range(0, len(Classifiers)):
        folder_predictions = path_output + 'predictions/' + Classifiers[
            i].name + '/'
        filename_predictions = folder_predictions + Classifiers[
            i].name + '_predictions.txt'
        if os.path.isfile(filename_predictions):
            tools_IO.plot_confusion_mat(plt.subplot(2, 2, i + 1),
                                        fig,
                                        filename_mat=filename_predictions,
                                        caption=Classifiers[i].name)

    plt.tight_layout()
    plt.show()

    return
コード例 #2
0
def benchmark_classifiers_on_extractor(use_cashed_predictions=True):
    path_features = 'data/features-natural/FC/'
    path_output = 'data/features-natural/'

    C1 = classifier_XGBoost.classifier_XGBoost()
    C2 = classifier_SVM.classifier_SVM()
    C3 = classifier_RF.classifier_RF()
    C4 = classifier_LM.classifier_LM()
    C5 = classifier_Gauss2.classifier_Gauss2()
    C6 = classifier_Bayes2.classifier_Bayes2()
    C7 = classifier_KNN.classifier_KNN()
    #C8 = classifier_FC_Keras.classifier_FC_Keras()

    #Classifiers = [C1, C2, C3, C4, C5, C6, C7, C8]
    Classifiers = [C2, C3, C4, C7]

    for i in range(0, len(Classifiers)):

        folder_predictions = path_output + 'predictions/' + Classifiers[
            i].name + '/'
        filename_predictions = folder_predictions + Classifiers[
            i].name + '_predictions.txt'
        if use_cashed_predictions == False or not os.path.isfile(
                filename_predictions):
            ML = tools_ML.tools_ML(Classifiers[i])
            ML.E2E_features(path_features,
                            folder_predictions,
                            limit_classes=20,
                            limit_instances=100)

    fig = plt.figure(figsize=(7, 6))
    fig.subplots_adjust(hspace=0.01)

    for i in range(0, len(Classifiers)):
        folder_predictions = path_output + 'predictions/' + Classifiers[
            i].name + '/'
        filename_predictions = folder_predictions + Classifiers[
            i].name + '_predictions.txt'
        if os.path.isfile(filename_predictions):
            tools_IO.plot_confusion_mat(plt.subplot(2, 2, i + 1),
                                        fig,
                                        filename_mat=filename_predictions,
                                        caption=Classifiers[i].name)

    plt.tight_layout()
    plt.show()

    return
コード例 #3
0
def benchmark_extractors():

    mask = '*.jpg'
    path_input = 'data/ex-natural'
    path_output = 'data/features/'

    E1 = CNN_AlexNet_TF.CNN_AlexNet_TF()
    E2 = CNN_Inception_TF.CNN_Inception_TF()
    #E3 = CNN_VGG16_Keras.CNN_VGG16_Keras()

    Extractors = [E1, E2]
    Classifier = classifier_RF.classifier_RF()
    ML = tools_ML.tools_ML(Classifier)

    fig = plt.figure(figsize=(12, 6))
    fig.subplots_adjust(hspace=0.01)

    for i in range(0, len(Extractors)):
        path_features = path_output + Extractors[i].name
        if not os.path.exists(path_features):
            Extractors[i].generate_features(path_input,
                                            path_features,
                                            mask=mask,
                                            limit=200)

        folder_predictions = path_output + 'predictions/' + Extractors[
            i].name + '/'
        filename_predictions = folder_predictions + Classifier.name + '_predictions.txt'
        if not os.path.isfile(filename_predictions):
            ML.E2E_features(path_features,
                            folder_predictions,
                            limit_classes=20,
                            limit_instances=100)

        tools_IO.plot_confusion_mat(plt.subplot(1, 3, i + 1),
                                    fig,
                                    filename_mat=filename_predictions,
                                    caption=Classifier.name + ' + ' +
                                    Extractors[i].name)

    plt.tight_layout()
    plt.show()

    return
コード例 #4
0
    def stage_train_stats(self,
                          path_output,
                          labels_fact,
                          labels_train_pred,
                          labels_test_pred,
                          labels_train_prob,
                          labels_test_prob,
                          patterns,
                          X=None,
                          Y=None,
                          verbose=False):

        labels_pred = numpy.hstack((labels_train_pred, labels_test_pred))
        labels_prob = numpy.hstack((labels_train_prob, labels_test_prob))

        predictions = numpy.array(
            [patterns[labels_fact], patterns[labels_pred], labels_prob]).T
        tools_IO.save_mat(
            predictions,
            path_output + self.classifier.name + '_predictions.txt')
        tools_IO.print_accuracy(labels_fact, labels_pred, patterns)
        tools_IO.print_accuracy(labels_fact,
                                labels_pred,
                                patterns,
                                filename=path_output + self.classifier.name +
                                '_confusion_mat.txt')
        tools_IO.print_top_fails(labels_fact,
                                 labels_pred,
                                 patterns,
                                 filename=path_output + self.classifier.name +
                                 '_errors.txt')
        tools_IO.print_reject_rate(labels_fact,
                                   labels_pred,
                                   labels_prob,
                                   filename=path_output +
                                   self.classifier.name + '_accuracy.txt')

        if verbose == True:
            #verbose_PCA = True if (X is not None) and (Y is not None) else False
            verbose_PCA = False

            if verbose_PCA:
                print('Extracting features for PCA')
                features = self.classifier.images_to_features(X)

            fig = plt.figure(figsize=(12, 6))
            fig.subplots_adjust(hspace=0.01)
            if verbose_PCA:
                tools_IO.plot_features_PCA(plt.subplot(1, 3, 1), features, Y,
                                           patterns)
                tools_IO.plot_learning_rates1(plt.subplot(1, 3, 2),
                                              fig,
                                              filename_mat=path_output +
                                              self.classifier.name +
                                              '_learn_rates.txt')
                tools_IO.plot_confusion_mat(plt.subplot(1, 3, 3),
                                            fig,
                                            filename_mat=path_output +
                                            self.classifier.name +
                                            '_predictions.txt',
                                            caption=self.classifier.name)
            else:
                tools_IO.plot_learning_rates1(plt.subplot(1, 2, 1),
                                              fig,
                                              filename_mat=path_output +
                                              self.classifier.name +
                                              '_learn_rates.txt')
                tools_IO.plot_confusion_mat(plt.subplot(1, 2, 2),
                                            fig,
                                            filename_mat=path_output +
                                            self.classifier.name +
                                            '_predictions.txt',
                                            caption=self.classifier.name)

            plt.tight_layout()
            plt.show()

        return
コード例 #5
0
    def E2E_features(self,
                     path_input,
                     path_output,
                     mask='.txt',
                     limit_classes=1000000,
                     limit_instances=1000000,
                     has_header=True,
                     has_labels_first_col=True):

        print('E2E train-test on features: classifier=%s\n\n' %
              (self.classifier.name))
        if not os.path.exists(path_output):
            os.makedirs(path_output)

        patterns = numpy.unique(
            numpy.array([
                f.path[len(path_input):].split(mask)[0]
                for f in os.scandir(path_input) if f.is_file()
            ]))[:limit_classes]

        (X, Y, filenames) = self.prepare_arrays_from_feature_files(
            path_input,
            patterns,
            feature_mask=mask,
            limit=limit_instances,
            has_header=has_header,
            has_labels_first_col=has_labels_first_col)
        idx_train = numpy.sort(
            numpy.random.choice(X.shape[0], int(X.shape[0] / 2),
                                replace=False))
        idx_test = numpy.array(
            [x for x in range(0, X.shape[0]) if x not in idx_train])

        if has_header:
            header = tools_IO.load_mat(path_input + ('%s%s' %
                                                     (patterns[0], mask)),
                                       numpy.chararray,
                                       delim='\t')
            header = header[0]
        else:
            header = None

        X = normalize(X)
        min = numpy.min(X)
        X -= min
        max = numpy.max(X)
        X *= 255.0 / (max)

        (labels_train_pred, labels_train_prob, challangers_train, challangers_train_prob, labels_test_pred,labels_test_prob, challangers_test, challangers_test_prob) = \
            self.train_test(X, Y, idx_train, idx_test)

        labels_fact = numpy.hstack((Y[idx_train], Y[idx_test]))

        self.stage_train_stats(path_output, labels_fact, labels_train_pred,
                               labels_test_pred, labels_train_prob,
                               labels_test_prob, patterns)

        N = 3
        if not has_header: N -= 1

        fig = plt.figure(figsize=(12, 6))
        fig.subplots_adjust(hspace=0.01)
        tools_IO.plot_features_PCA(plt.subplot(1, N, 1), X, Y, patterns)
        tools_IO.plot_confusion_mat(
            plt.subplot(1, N, 2), fig,
            path_output + self.classifier.name + '_predictions.txt',
            self.classifier.name)
        if has_header:
            tools_IO.plot_feature_importance(plt.subplot(1, N, 3), fig, X, Y,
                                             header)
        plt.tight_layout()
        plt.savefig(path_output + 'fig_roc.png')

        return