Esempio n. 1
0
def bsoid_nn(feats,
             labels,
             comp=COMP,
             hldout=HLDOUT,
             cv_it=CV_IT,
             mlp_params=MLP_PARAMS):
    """
    Trains MLP classifier
    :param feats: 2D array, original feature space, standardized
    :param labels: 1D array, GMM output assignments
    :param hldout: scalar, test partition ratio for validating MLP performance in GLOBAL_CONFIG
    :param cv_it: scalar, iterations for cross-validation in GLOBAL_CONFIG
    :param mlp_params: dict, MLP parameters in GLOBAL_CONFIG
    :return classifier: obj, MLP classifier
    :return scores: 1D array, cross-validated accuracy
    """
    if comp == 1:
        feats_train, feats_test, labels_train, labels_test = train_test_split(
            feats.T, labels.T, test_size=hldout, random_state=23)
        logging.info(
            'Training feedforward neural network on randomly partitioned {}% of training data...'
            .format((1 - hldout) * 100))
        classifier = MLPClassifier(**mlp_params)
        classifier.fit(feats_train, labels_train)
        logging.info(
            'Done training feedforward neural network mapping {} features to {} assignments.'
            .format(feats_train.shape, labels_train.shape))
        logging.info(
            'Predicting randomly sampled (non-overlapped) assignments '
            'using the remaining {}%...'.format(HLDOUT * 100))
        scores = cross_val_score(classifier,
                                 feats_test,
                                 labels_test,
                                 cv=cv_it,
                                 n_jobs=-1)
        timestr = time.strftime("_%Y%m%d_%H%M")
        if PLOT_TRAINING:
            np.set_printoptions(precision=2)
            titles_options = [("Non-normalized confusion matrix", None),
                              ("Normalized confusion matrix", 'true')]
            titlenames = [("counts"), ("norm")]
            j = 0
            for title, normalize in titles_options:
                disp = plot_confusion_matrix(classifier,
                                             feats_test,
                                             labels_test,
                                             cmap=plt.cm.Blues,
                                             normalize=normalize)
                disp.ax_.set_title(title)
                print(title)
                print(disp.confusion_matrix)
                my_file = 'confusion_matrix_{}'.format(titlenames[j])
                disp.figure_.savefig(
                    os.path.join(OUTPUT_PATH,
                                 str.join('', (my_file, timestr, '.svg'))))
                j += 1
            plt.show()
    else:
        classifier = []
        scores = []
        for i in range(len(feats)):
            feats_train, feats_test, labels_train, labels_test = train_test_split(
                feats[i].T, labels[i].T, test_size=hldout, random_state=23)
            logging.info(
                'Training feedforward neural network on randomly partitioned {}% of training data...'
                .format((1 - hldout) * 100))
            clf = MLPClassifier(**mlp_params)
            clf.fit(feats_train, labels_train)
            classifier.append(clf)
            logging.info(
                'Done training feedforward neural network mapping {} features to {} assignments.'
                .format(feats_train.shape, labels_train.shape))
            logging.info(
                'Predicting randomly sampled (non-overlapped) assignments '
                'using the remaining {}%...'.format(HLDOUT * 100))
            sc = cross_val_score(classifier,
                                 feats_test,
                                 labels_test,
                                 cv=cv_it,
                                 n_jobs=-1)
            timestr = time.strftime("_%Y%m%d_%H%M")
            if PLOT_TRAINING:
                np.set_printoptions(precision=2)
                titles_options = [("Non-normalized confusion matrix", None),
                                  ("Normalized confusion matrix", 'true')]
                j = 0
                titlenames = [("counts"), ("norm")]
                for title, normalize in titles_options:
                    disp = plot_confusion_matrix(classifier,
                                                 feats_test,
                                                 labels_test,
                                                 cmap=plt.cm.Blues,
                                                 normalize=normalize)
                    disp.ax_.set_title(title)
                    print(title)
                    print(disp.confusion_matrix)
                    my_file = 'confusion_matrix_clf{}_{}'.format(
                        i, titlenames[j])
                    disp.figure_.savefig(
                        os.path.join(OUTPUT_PATH,
                                     str.join('', (my_file, timestr, '.svg'))))
                    j += 1
                plt.show()
    logging.info(
        'Scored cross-validated feedforward neural network performance.'.
        format(feats_train.shape, labels_train.shape))
    return classifier, scores