def bsoid_nn(feats, labels, comp=COMP, hldout=HLDOUT, cv_it=CV_IT, mlp_params=MLP_PARAMS): """ Trains MLP classifier :param feats: 2D array, original feature space, standardized :param labels: 1D array, GMM output assignments :param hldout: scalar, test partition ratio for validating MLP performance in GLOBAL_CONFIG :param cv_it: scalar, iterations for cross-validation in GLOBAL_CONFIG :param mlp_params: dict, MLP parameters in GLOBAL_CONFIG :return classifier: obj, MLP classifier :return scores: 1D array, cross-validated accuracy """ if comp == 1: feats_train, feats_test, labels_train, labels_test = train_test_split( feats.T, labels.T, test_size=hldout, random_state=23) logging.info( 'Training feedforward neural network on randomly partitioned {}% of training data...' .format((1 - hldout) * 100)) classifier = MLPClassifier(**mlp_params) classifier.fit(feats_train, labels_train) logging.info( 'Done training feedforward neural network mapping {} features to {} assignments.' .format(feats_train.shape, labels_train.shape)) logging.info( 'Predicting randomly sampled (non-overlapped) assignments ' 'using the remaining {}%...'.format(HLDOUT * 100)) scores = cross_val_score(classifier, feats_test, labels_test, cv=cv_it, n_jobs=-1) timestr = time.strftime("_%Y%m%d_%H%M") if PLOT_TRAINING: np.set_printoptions(precision=2) titles_options = [("Non-normalized confusion matrix", None), ("Normalized confusion matrix", 'true')] titlenames = [("counts"), ("norm")] j = 0 for title, normalize in titles_options: disp = plot_confusion_matrix(classifier, feats_test, labels_test, cmap=plt.cm.Blues, normalize=normalize) disp.ax_.set_title(title) print(title) print(disp.confusion_matrix) my_file = 'confusion_matrix_{}'.format(titlenames[j]) disp.figure_.savefig( os.path.join(OUTPUT_PATH, str.join('', (my_file, timestr, '.svg')))) j += 1 plt.show() else: classifier = [] scores = [] for i in range(len(feats)): feats_train, feats_test, labels_train, labels_test = train_test_split( feats[i].T, labels[i].T, test_size=hldout, random_state=23) logging.info( 'Training feedforward neural network on randomly partitioned {}% of training data...' .format((1 - hldout) * 100)) clf = MLPClassifier(**mlp_params) clf.fit(feats_train, labels_train) classifier.append(clf) logging.info( 'Done training feedforward neural network mapping {} features to {} assignments.' .format(feats_train.shape, labels_train.shape)) logging.info( 'Predicting randomly sampled (non-overlapped) assignments ' 'using the remaining {}%...'.format(HLDOUT * 100)) sc = cross_val_score(classifier, feats_test, labels_test, cv=cv_it, n_jobs=-1) timestr = time.strftime("_%Y%m%d_%H%M") if PLOT_TRAINING: np.set_printoptions(precision=2) titles_options = [("Non-normalized confusion matrix", None), ("Normalized confusion matrix", 'true')] j = 0 titlenames = [("counts"), ("norm")] for title, normalize in titles_options: disp = plot_confusion_matrix(classifier, feats_test, labels_test, cmap=plt.cm.Blues, normalize=normalize) disp.ax_.set_title(title) print(title) print(disp.confusion_matrix) my_file = 'confusion_matrix_clf{}_{}'.format( i, titlenames[j]) disp.figure_.savefig( os.path.join(OUTPUT_PATH, str.join('', (my_file, timestr, '.svg')))) j += 1 plt.show() logging.info( 'Scored cross-validated feedforward neural network performance.'. format(feats_train.shape, labels_train.shape)) return classifier, scores