예제 #1
0
def save_confusion_matrix(y_test, y_pred, target_names, path, figsize=(15, 15), suffix=''):
    import matplotlib.pyplot as plt
    cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))
    plt.figure(figsize=figsize)
    plot_confusion_matrix(cm, target_names, normalize=True, suffix=suffix)
    plt.savefig(os.path.join(path, 'Confusion_matrix{}.png'.format(suffix)), bbox_inches='tight')
    plt.close()
예제 #2
0
def crossval_on_all(noise_session, gesture_data, n_splits=10):
    dataset = gesture_data.dataset()
    noise_dataset = noise_session.dataset()

    # Splitting the dataset into 10 folds in a stratified manner
    sss = StratifiedShuffleSplit(n_splits=n_splits)
    fold_accuracies = np.zeros((n_splits, ))
    fold_f1 = np.zeros((n_splits, ))
    confusion_matrices = np.zeros((n_splits, N_CLASSES, N_CLASSES))

    for (train_indexes, test_indexes), i in zip(
            sss.split(np.zeros(dataset.length()), dataset.labels_gesture_type),
            range(n_splits)):
        # Balance the training data (due to double sequences)
        dataset_train = dataset.select_indexes(train_indexes)
        dataset_test = dataset.select_indexes(test_indexes)
        fold_accuracies[i], fold_f1[i], confusion_matrices[
            i], probs_d, probs_t, labels = evaluate_fold(
                noise_dataset, dataset_train, dataset_test)
        print("Fold " + str(i) + " accuracy: " + str(fold_accuracies[i]) +
              ", f1: " + str(fold_f1[i]))
    mean_acc = np.mean(fold_accuracies)
    mean_f1 = np.mean(fold_f1)
    print("Mean accuracy: " + str(mean_acc))
    print("Mean f1 score: " + str(mean_f1))
    cm_sum = np.sum(confusion_matrices, axis=0)
    plot_confusion_matrix(cm_sum, [
        'Null', 'Snap left', 'Snap right', 'Knock left', 'Knock right', 'Clap',
        'Knock left 2x', 'Knock right 2x', 'Clap 2x'
    ],
                          normalize=True)
    return mean_acc
def save_plots(y_test, predictions, categories, pipe_name, report):
    plot_confusion_matrix(y_test,
                          predictions,
                          categories,
                          pipe_name,
                          current_checkpoint_directory,
                          normalize=True)
    save_text(
        os.path.join(current_checkpoint_directory,
                     "{}.{}".format(pipe_name, report_file_extension)), report)
def run_classifier(data, target):
   data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42)
   #pline = Pipeline([('clf', SVC(C=1.0, kernel='rbf', gamma=0.01))])
   svc = SVC(probability=True)
   print "Shape of training set: %s" % (data_train.shape,)
   print "Shape of test set: %s" % (data_test.shape,)

   #params = {
   #	   'kernel': ('poly', 'rbf', 'sigmoid', 'precomputed'),
   #	   'gamma': (0.01, 0.03, 0.1, 0.3, 1, 3, 5),
   #	   'C': (0.1, 0.3, 1, 3, 10, 30, 50, 100),
   #}
   params = [
	   {'C': [0.1, 0.3, 1, 3, 10, 30], 'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 5], 'degree': [2,3,4],
           'kernel': ['rbf', 'poly', 'sigmoid']},
   	  # {'C': (0.1, 0.3, 1, 3, 10, 30, 50, 100), 'gamma': (0.01, 0.03, 0.1, 0.3, 1, 3, 5), 'kernel': ['rbf']},
   ]

   gsearch = GridSearchCV(svc, params, n_jobs=2,
		   verbose=1, scoring='f1_micro', cv=5)

   gsearch.fit(data_train, target_train)
   print 'Best score: %0.3f' % gsearch.best_score_

   print 'Best parameters set:'
   best_params = gsearch.best_estimator_.get_params()

   #for param_name in sorted(params.keys()):
   #	   print '\t%s: %r' % (param_name, best_params[param_name])

   for dict_item in params:
	for param_name in sorted(dict_item.keys()):
		print '\t%s: %r' % (param_name, best_params[param_name])

   preds = gsearch.predict(data_test)
   print classification_report(target_test, preds, target_names=mg_fft.GENRES)

   conf_matr = confusion_matrix(target_test, preds)
   conf_matr = normalize(conf_matr)
   plots.plot_confusion_matrix(conf_matr, mg_fft.GENRES, "Confusion Matrix", "Music Genres")

   #gather data for ROC curves
   fprs = []
   tprs = []
   AUCs = []
   for label in range(len(mg_fft.GENRES)):
	   target_label_test = np.asarray(target_test==label, dtype=int)
	   proba = gsearch.predict_proba(data_test)
	   proba_label = proba[:, label]
   	   fpr, tpr, roc_thresholds = roc_curve(target_label_test, proba_label)
	   fprs.append(fpr)
	   tprs.append(tpr)
	   AUCs.append(roc_auc_score(target_label_test, proba_label))

   plots.plot_roc_curves(fprs, tprs, AUCs, mg_fft.GENRES)
예제 #5
0
파일: base.py 프로젝트: prengifo/r2c2usb
    def train(self, test_size=0.2, random_state=None):
        # Split dataset into training and validation.
        X_train, X_test, y_train, y_test = cross_validation\
            .train_test_split(self.dataset.matrix,
                              self.dataset.labels,
                              test_size=test_size,
                              random_state=random_state)
        self.clf.fit(X_train, y_train)
        y_pred = self.clf.predict(X_test)
        print 'accuracy on testing set', self.clf.score(X_test, y_test)
        cm = confusion_matrix(y_test, y_pred)
        print cm
        print classification_report(y_test, y_pred,
                                    target_names=['verde', 'amarillo', 'rojo'])
        
        plot_confusion_matrix(cm)

        return cm
예제 #6
0
    def train(self, test_size=0.2, random_state=None):
        # Split dataset into training and validation.
        X_train, X_test, y_train, y_test = cross_validation\
            .train_test_split(self.dataset.matrix,
                              self.dataset.labels,
                              test_size=test_size,
                              random_state=random_state)
        self.clf.fit(X_train, y_train)
        y_pred = self.clf.predict(X_test)
        print 'accuracy on testing set', self.clf.score(X_test, y_test)
        cm = confusion_matrix(y_test, y_pred)
        print cm
        print classification_report(y_test,
                                    y_pred,
                                    target_names=['verde', 'amarillo', 'rojo'])

        plot_confusion_matrix(cm)

        return cm
# save predictions to csv
y_pred = predictions.argmax(axis=-1)
y_pred = label_encoder.inverse_transform(y_pred)
df_results = pd.read_csv(os.path.join('data', 'test.csv'))
df_predictions = pd.DataFrame(y_pred)
df_results['predictions'] = df_predictions
df_results.to_csv(os.path.join(results_dir, 'predictions.csv'), index=False)

# plot confusion matrix
test_gen = custom_generator(df_test, label_encoder, bin_size, False,
                            (224, 224), 1)
plots.plot_confusion_matrix(figures_dir,
                            'confusion_' + checkpoint_filename[:-5],
                            test_gen,
                            df_test.shape[0],
                            label_encoder,
                            predictions,
                            show_values=True)

# plot ROC curves
auc_micro, auc_macro = plots.plot_ROC(figures_dir,
                                      'ROC_' + checkpoint_filename[:-5],
                                      test_gen, df_test.shape[0],
                                      label_encoder, predictions)

# save results
d = {
    'learning_rate': [learning_rate],
    'batch_size': [batch_size],
    'num_frozen_layers': [num_frozen_layers],
예제 #8
0
print("Training Accuracy: {:.4f}".format(accuracy))
loss, accuracy = model.evaluate(valid_seq_x, valid_y, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

# predict the labels on validation dataset
predictions = model.predict(valid_seq_x)
predictions = predictions.argmax(axis=-1)
print(metrics.accuracy_score(predictions, valid_y))
matrix = metrics.confusion_matrix(predictions, valid_y)
print(matrix)

np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plot_confusion_matrix(
    valid_y,
    predictions,
    classes=class_names,
    title='Confusion matrix, without normalization testing accuracy: {:.4f}'.
    format(accuracy))
# Plot normalized confusion matrix
#plot_confusion_matrix(valid_y, predictions, classes=class_names, normalize=True,
#                      title='Normalized confusion matrix')

#plt.show()
plt.savefig('confusion_matrix.png')

sleep(60)

shuffle(tweets)
for t in tweets[:20]:
    tweet = p.clean(t[3])
    text = sequence.pad_sequences(tokenizer.texts_to_sequences([tweet]),
예제 #9
0
best_val_acc = 0.0
train_losses, train_accuracies = [], []
valid_losses, valid_accuracies = [], []

for epoch in range(NUM_EPOCHS):
    train_loss, train_accuracy = train(model, device, train_loader, criterion,
                                       optimizer, epoch)
    valid_loss, valid_accuracy, valid_results = evaluate(
        model, device, valid_loader, criterion)

    train_losses.append(train_loss)
    valid_losses.append(valid_loss)

    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)

    is_best = valid_accuracy > best_val_acc  # let's keep the model that has the best accuracy, but you can also use another metric.
    if is_best:
        torch.save(model, os.path.join(PATH_OUTPUT, save_file))

plot_learning_curves(train_losses, valid_losses, train_accuracies,
                     valid_accuracies)

best_model = torch.load(os.path.join(PATH_OUTPUT, save_file))
test_loss, test_accuracy, test_results = evaluate(best_model, device,
                                                  test_loader, criterion)

class_names = ['Seizure', 'TumorArea', 'HealthyArea', 'EyesClosed', 'EyesOpen']
plot_confusion_matrix(test_results, class_names)
예제 #10
0
    folds = 100
    cv = KFold(n_splits=folds)
    scores = model_selection.cross_val_score(lr, x, y, cv=cv)
    y_pred = model_selection.cross_val_predict(lr, x, y, cv=cv)
    print("%1d-fold cross validation average accuracy: %.3f" %
          (folds, scores.mean()))

    print("--- %s seconds ---" % (time.time() - start_time))

    # classification report
    print(classification_report(y, y_pred))

    # confusion matrix
    cm = normalize(confusion_matrix(y, y_pred), axis=1, norm='l1')
    plot_confusion_matrix(cm, genre_list, "Name", "Confusion Matrix")

    print("Creating roc curves")

    # ROC curve
    fprs = []
    tprs = []
    AUCs = []

    for label in range(len(genre_list)):
        y_label = np.asarray(y == label, dtype=int)
        proba = model_selection.cross_val_predict(lr,
                                                  x,
                                                  y,
                                                  cv=cv,
                                                  method='predict_proba')
# inverse transform true labels
y_true = np.array(y_true).argmax(axis=-1)
y_true = label_encoder.inverse_transform(y_true)

# save predictions to csv
y_pred = predictions.argmax(axis=-1)
y_pred = label_encoder.inverse_transform(y_pred)
df_results = pd.read_csv(os.path.join('data', 'test.csv'))
df_predictions = pd.DataFrame(y_pred)
df_results['ytrue'] = pd.DataFrame(y_true)
df_results['predictions'] = df_predictions
df_results.to_csv(os.path.join(results_dir, 'predictions.csv'), index=False)

# plot confusion matrix
test_gen = custom_generator(df_test, label_encoder, bin_size, False, (224, 224), 1)
plots.plot_confusion_matrix(results_dir, 'confusion_matrix',
                            test_gen, df_test.shape[0], label_encoder, predictions, show_values=True)

# plot ROC curves
auc_micro, auc_macro = plots.plot_ROC(results_dir, 'roc_curves', test_gen, df_test.shape[0], label_encoder, predictions)

# save results
d = {'model': [model], 'learning_rate': [learning_rate], 'batch_size': [batch_size],
     'num_frozen_layers': [num_frozen_layers], 'use_class_weights': [use_class_weights], 'bin_size': [bin_size],
     'test_loss': [test_loss], 'test_accuracy': [test_accuracy], 'test_mean_absolute_bin': [test_mean_absolute_bin_error],
     'micro_auc': [auc_micro], 'macro_auc': [auc_macro], 'folder': [start_time]}
df_parameters = pd.DataFrame(d, columns=['model', 'learning_rate', 'batch_size', 'num_frozen_layers', 'use_class_weights',
                                          'bin_size', 'test_loss', 'test_accuracy', 'test_mean_absolute_bin',
                                          'micro_auc', 'macro_auc', 'folder'])
df_parameters.to_csv(os.path.join(results_dir, 'results.csv'), index=False)
예제 #12
0
          validation_data = (X_test_norm, y_test),
          show_accuracy = True, verbose=2)

    # Calculate metrics
    y_pred_train = model.predict_classes(X_train_norm)    # Predicted y_train classification
    print "Train Acc:"
    metrics.calc_acc(y_pred_train, y_train)
    y_pred_test = model.predict_classes(X_test_norm)      # Predicted y_test classification
    print "Train Acc:"
    metrics.calc_acc(y_pred_test, y_test)
    # Classification report
    y_act = get_labels.get_label_1D(y_test)
    print(classification_report(y_act, y_pred_test, target_names=['spiral', 'elliptical', 'uncertain']))
    # Plot confusion matrix
    cm = confusion_matrix(y_act, y_pred_test)
    plots.plot_confusion_matrix(cm, normed=True)
    # Get probabilities for each classification
    probas = model.predict_proba(X_test_norm)
    # Calculate true positive and false positive rates treating each classifier as binary
    sp_fpr, sp_tpr, _ = roc_curve(y_test20[:,0], probas[:,0])
    ell_fpr, ell_tpr, _ = roc_curve(y_test20[:,1], probas[:,1])
    unc_fpr, unc_tpr, _ = roc_curve(y_test20[:,2], probas[:,2])
    # Plot ROC curves
    plots.plot_ROC_curve(sp_tpr, sp_fpr, ell_tpr, ell_fpr, unc_tpr, unc_fpr)
    # Print AUC scores
    print "AUC for spirals:"
    roc_auc_score(y_test[:,0], probas[:,0])  
    print "AUC for ellipticals:"
    roc_auc_score(y_test[:,0], probas[:,1])    
    print "AUC for uncertain:"
    roc_auc_score(y_test[:,0], probas[:,2])
예제 #13
0
def main():

    data_path = '../data/'
    model_path = '../model_6/model_6'
    mod_types = [
        'gfsk', 'gmsk', 'qam4', 'qam16', 'qam64', 'psk2', 'psk4', 'psk8'
    ]
    feature_types = ['cumulants', 'amplitude_stats', 'phase_stats']
    # feature_types = ['amplitude_stats', 'phase_stats']
    feature_names = [
        '|C20|', '|C21|', '|C40|', '|C41|', '|C42|', '|C60|', '|C61|', '|C62|',
        '|C63|', '∠C20', '∠C21', '∠C40', '∠C41', '∠C42', '∠C60', '∠C61',
        '∠C62', '∠C63', 'Magnitude mean', 'Magnitude std', 'Phase mean',
        'Phase std'
    ]

    num_frames = int(1e3)
    frame_len = 2048
    frame_step = 256
    snr_list = None

    files = dict()
    for mod_type in mod_types:

        files[mod_type] = list()
        files[mod_type].append(data_path + mod_type + '.txt')

    print('Extracting features')
    features_dict = extract_features(files,
                                     mod_types,
                                     feature_types=feature_types,
                                     frame_len=frame_len,
                                     frame_step=frame_step,
                                     num_frames=num_frames,
                                     snr_list=snr_list,
                                     verbose=True)
    plot_feature_stats(features_dict, feature_names)
    print('Number of features: {0}'.format(
        features_dict[mod_types[0]].shape[1]))

    print('Converting features')
    (features, labels) = convert_features(features_dict)
    (train_feats, test_feats, train_labels,
     test_labels) = train_test_split(features, labels, test_size=0.1)
    print('Number of samples: {0}'.format(features.shape[0]))
    print('Number of training samples: {0}'.format(train_feats.shape[0]))
    print('Number of testing samples: {0}'.format(test_feats.shape[0]))

    print('Training model')
    classifier = train_neural_network(train_feats, train_labels)

    print('Saving parameters')
    save_parameters(model_path, classifier.coefs_, classifier.intercepts_)

    print('Testing model')
    pred_labels = classifier.predict(test_feats)
    plot_confusion_matrix(test_labels,
                          pred_labels,
                          np.array(mod_types),
                          title='',
                          normalize=True)
    # plot_learning_curve(classifier, train_feats, train_labels,
    #     title='Learning Curve', cv=3, n_jobs=2, train_sizes=np.linspace(0.1, 1.0, 10))

    accuracy = accuracy_score(test_labels, pred_labels)
    print('Accuracy: {:.2f}'.format(accuracy))

    report = classification_report(test_labels,
                                   pred_labels,
                                   target_names=mod_types)
    print(report)

    print('Saving model')
    import pickle
    with open(model_path + '.pkl', 'wb') as file:
        pickle.dump(classifier, file)

    plt.show()
예제 #14
0
파일: ml.py 프로젝트: chrisworld/kws_game
    def eval(self):
        """
    evaluation
    """

        print("\n--Evaluation on Test Set:")

        # evaluation of model
        eval_score = self.net_handler.eval_nn(eval_set='test',
                                              batch_archive=self.batch_archive,
                                              collect_things=True,
                                              verbose=False)

        # score print of collected
        eval_score.info_collected(self.net_handler.nn_arch,
                                  self.audio_dataset.param_path,
                                  self.cfg_ml['train_params'],
                                  info_file=self.score_file,
                                  do_print=False)

        # log to file
        if self.cfg_ml['logging_enabled']:
            logging.info(
                eval_score.info_detail_log(self.net_handler.nn_arch,
                                           self.audio_dataset.param_path,
                                           self.cfg_ml['train_params']))

        # print confusion matrix
        print("confusion matrix:\n{}\n".format(eval_score.cm))

        # plot confusion matrix
        plot_confusion_matrix(eval_score.cm,
                              self.batch_archive.classes,
                              plot_path=self.model_path,
                              name='confusion_test')

        # --
        # evaluation on my set

        if self.batch_archive.x_my is None:
            if self.cfg_ml['logging_enabled']: logging.info('\n')
            return

        print("\n--Evaluation on My Set:")

        # evaluation of model
        eval_score = self.net_handler.eval_nn(eval_set='my',
                                              batch_archive=self.batch_archive,
                                              collect_things=True,
                                              verbose=True)

        # score print of collected
        eval_score.info_collected(self.net_handler.nn_arch,
                                  self.audio_dataset.param_path,
                                  self.cfg_ml['train_params'],
                                  info_file=self.score_file,
                                  do_print=False)

        # log to file
        if self.cfg_ml['logging_enabled']:
            logging.info(
                eval_score.info_detail_log(self.net_handler.nn_arch,
                                           self.audio_dataset.param_path,
                                           self.cfg_ml['train_params']))

        # confusion matrix
        print("confusion matrix:\n{}\n".format(eval_score.cm))

        # plot confusion matrix
        plot_confusion_matrix(eval_score.cm,
                              self.batch_archive.classes,
                              plot_path=self.model_path,
                              name='confusion_my')

        # new line for log
        if self.cfg_ml['logging_enabled']: logging.info('\n')
예제 #15
0
def main():
    num_epochs = 3
    batch_size = 50

    # Define working directory locations
    data_dir = '../../data/'
    graph_dir = '../../graphs'

    output_dir = '../output/model'
    os.makedirs(output_dir, exist_ok=True)

    # Set environment seeds
    torch.manual_seed(0)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(0)
    torch.manual_seed(42)
    prng = np.random.RandomState(42)

    # Prepare values for model metric reporting
    all_test_loss = []
    all_test_accuracy = []
    all_test_results = []
    all_best_acc = 0.0
    best_file = 'SleepCNNBest.pth'

    # Extract raw EEG and annotation data from full raw dataset
    eeg_list_all, annotation_list_all = load_sleep_dataset(data_dir)

    start = time.time()
    for fold in range(20):
        # Divide subject list for 20-fold cross validation
        subjects = [x for x in range(20)]
        test = [fold]
        subjects.remove(fold)
        training = prng.choice(subjects, 15, replace=False)
        validate = list(set(subjects) - set(training))

        # Format train, validation, and test datasets for PyTorch
        train_dataset = process_data_for_1d(load_sleep_dataset_targets(
            training, eeg_list_all, annotation_list_all),
                                            not_test_set=True)
        validation_dataset = process_data_for_1d(load_sleep_dataset_targets(
            validate, eeg_list_all, annotation_list_all),
                                                 not_test_set=True)
        test_dataset = process_data_for_1d(load_sleep_dataset_targets(
            test, eeg_list_all, annotation_list_all),
                                           not_test_set=False)

        # Load dataset splits into PyTorch
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        valid_loader = torch.utils.data.DataLoader(validation_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=True)

        # Load CNN model and parameters
        model = m.SleepCNN_1D()
        if torch.cuda.is_available():
            print('Unleashing CUDA, zoom zoom!')
            model = model.cuda()
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())
        criterion.to(device)
        save_file = 'SleepCNN.pth'

        best_val_acc = 0.0
        train_losses, train_accuracies = [], []
        valid_losses, valid_accuracies = [], []

        for epoch in range(num_epochs):
            # Iteratively train and validate network
            train_loss, train_accuracy = train(model, device, train_loader,
                                               criterion, optimizer, epoch,
                                               fold)

            valid_loss, valid_accuracy, valid_results = evaluate(
                model, device, valid_loader, criterion)

            train_losses.append(train_loss)
            valid_losses.append(valid_loss)

            train_accuracies.append(train_accuracy)
            valid_accuracies.append(valid_accuracy)

            # Save model with best accuracy for final evaluation
            if valid_accuracy > best_val_acc:
                best_val_acc = valid_accuracy
                torch.save(model, os.path.join(output_dir, save_file))
            if valid_accuracy > all_best_acc:
                all_best_acc = valid_accuracy
                torch.save(model, os.path.join(output_dir, best_file))

        # Run final evaluation with best seen performing model
        best_model = torch.load(os.path.join(output_dir, save_file))
        test_loss, test_accuracy, test_results = evaluate(
            best_model, device, test_loader, criterion)

        # Update master list of results for use in cross-validation
        all_test_loss.append(test_loss)
        all_test_accuracy.append(test_accuracy)
        [all_test_results.append(x) for x in test_results]

    end = time.time()
    print(f'Total fold time took {(end - start) / 60.} mins.')

    # Summarize results across all folds from cross-validation
    joblib.dump(all_test_results, graph_dir + '/all_test_results.gz')
    y_true = [x[0] for x in all_test_results]
    y_pred = [x[1] for x in all_test_results]

    accuracy = metrics.accuracy_score(y_true, y_pred)
    recall = metrics.recall_score(y_true, y_pred, average=None)
    f1_score = metrics.f1_score(y_true, y_pred, average=None)
    precision = metrics.precision_score(y_true, y_pred, average=None)
    global_precision = metrics.precision_score(y_true, y_pred, average='micro')

    # Save model summary metrics to file
    with open(graph_dir + '/result_metrics.txt', 'w') as f:
        f.write(f'Class Precision: {precision}')
        f.write(f'\nGlobal precision: {global_precision}')
        f.write(f'\nClass Recall: {recall}')
        f.write(f'\nClass F1-score: {f1_score}')
        f.write(f'\nAccuracy: {accuracy}')

    # Plot final confusion matrices with info from all cv folds
    class_names = ['Non-REM 1', 'Non-REM 2', 'Non-REM 3', 'REM', 'Wake']
    plot_confusion_matrix(all_test_results,
                          class_names,
                          outdir=graph_dir,
                          normalize=False)
    plot_confusion_matrix(all_test_results,
                          class_names,
                          outdir=graph_dir,
                          normalize=True)
예제 #16
0
cc_classifier.fit(train_cc)

# Evaluate classifier on train data
cm = np.zeros((len(classes), len(classes)))
for i, cc in enumerate(train_cc):
    print(f"Evaluation of [TRAIN DATA] {train_cc_files[i]}")
    cc.set_predicted_labels(cc_classifier.predict(cc))
    cc.eval_classification_error(ground_truth_type="componentwise")
    cc.eval_classification_error(ground_truth_type="pointwise")
    this_cm = cc.eval_classification_error(ground_truth_type="pointwise",
                                           include_unassociated_points=True,
                                           classes=np.array(
                                               list(classes.keys())))
    plot_confusion_matrix(this_cm,
                          list(classes.values()),
                          data_type='train_' + train_cc_files[i].split('.')[0],
                          id=id_experimentation,
                          folder=plot_backup_folder)
    cm += this_cm
plot_confusion_matrix(cm,
                      list(classes.values()),
                      data_type='train',
                      id=id_experimentation,
                      folder=plot_backup_folder)

# Evaluate classifier on test data
cm = np.zeros((len(classes), len(classes)))
for i, cc in enumerate(test_cc):
    print(f"Evaluation of [TEST DATA] {test_cc_files[i]}")
    cc.set_predicted_labels(cc_classifier.predict(cc))
    cc.eval_classification_error(ground_truth_type="componentwise")
예제 #17
0
def main(experiment_path, plot_results=False):
    (kfold_data, X_test) = prepare_data_cv('../input')
    
    models_proba = []
    models_acc = []
    models_roc = []
    models_logloss = []
    models_map = []
    
    for idx, data in enumerate(kfold_data):
        X_train, y_train, X_valid, y_valid = data
        
        model = load_model(get_resnet_18, weights=None)
        callbacks = get_model_callbacks(save_dir=os.path.join(experiment_path, 'fold_%02d' % idx))
        data_generator = get_data_generator(X_train, y_train, batch_size=128)

        model.fit_generator(
            data_generator,
            steps_per_epoch=10,
            epochs=1000,
            verbose=True,
            validation_data=(X_valid, y_valid),
            callbacks=callbacks,
            shuffle=True)

        model.load_weights(filepath=os.path.join(experiment_path, ('fold_%02d/model/model_weights.hdf5' % idx)))

        _, acc_val = model.evaluate(X_valid, y_valid, verbose=False)
        proba = model.predict(X_valid)
        proba_test = model.predict(X_test)[:, 1]

        models_proba.append(proba_test)
        models_acc.append(acc_val)
        models_roc.append(roc_auc_score(y_valid.argmax(axis=1), proba[:, 1]))
        models_map.append(average_precision_score(y_valid.argmax(axis=1), proba[:, 1]))
        models_logloss.append(logloss_softmax(y_valid, proba))

        prepare_submission([proba_test], os.path.join(experiment_path, 'fold_%02d/prediction.csv' % idx))

        if plot_results:
            plots_path = os.path.join(experiment_path, 'fold_%02d/plots' % idx)
            if not os.path.exists(plots_path):
                os.makedirs(plots_path)

            plot_precision_recall(proba[:, 1], y_valid.argmax(axis=1),
                                  path=os.path.join(plots_path, 'recall_precision.jpg'))

            plot_roc(proba[:, 1], y_valid.argmax(axis=1),
                     path=os.path.join(plots_path, 'roc.jpg'))

            plot_confusion_matrix(proba[:, 1], y_valid.argmax(axis=1),
                                  path=os.path.join(plots_path, 'conf.jpg'))

        print('Loss:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_logloss),
                                                                  np.std(models_logloss),
                                                                  np.min(models_logloss),
                                                                  np.max(models_logloss)))

        print('Acc:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_acc),
                                                                  np.std(models_acc),
                                                                  np.min(models_acc),
                                                                  np.max(models_acc)))

        print('ROC AUC:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_roc),
                                                                     np.std(models_roc),
                                                                     np.min(models_roc),
                                                                     np.max(models_roc)))

        print('mAP:\nMean: %f\nStd: %f\nMin: %f\nMax: %f\n\n' % (np.mean(models_map),
                                                                 np.std(models_map),
                                                                 np.min(models_map),
                                                                 np.max(models_map)))

    prepare_submission(models_proba, os.path.join(experiment_path, 'submission.csv'))
    if is_best:
        best_val_auc = valid_roc
        torch.save(model, os.path.join(PATH_OUTPUT, "MyCNN.pth"))

best_model = torch.load(os.path.join(PATH_OUTPUT, "MyCNN.pth"))

plot_learning_curves(train_losses, valid_losses, train_accuracies,
                     valid_accuracies)

plot_learning_curves_roc(train_rocs,
                         valid_rocs,
                         filename='learning_curves_roc.png')

train_loss, train_accuracy, train_results = evaluate(model, device,
                                                     train_loader, criterion)
plot_confusion_matrix(train_results, ["Alive", "Dead"])

valid_loss, valid_accuracy, valid_results = evaluate(model, device,
                                                     valid_loader, criterion)
plot_confusion_matrix(valid_results, ["Alive", "Dead"])

valid_results = np.array(valid_results)
actual = valid_results[:, :1]
pred = valid_results[:, 1:]
fpr, tpr, _ = roc_curve(actual, pred)
roc_auc = auc(fpr, tpr)
print("Roc_auc: " + str(roc_auc))

y_true = [x[0] for x in valid_results]
y_pred = [x[1] for x in valid_results]
예제 #19
0
def main():
    global args
    parser = argparse.ArgumentParser(
        description="Convolutional NN Testing Script")
    parser.add_argument("-c",
                        "--config",
                        dest="configfile",
                        default='config.yml',
                        help="Path to yaml configuration file")
    parser.add_argument("-m",
                        "--modelnames",
                        dest="modelnames",
                        nargs="*",
                        default=None,
                        required=False,
                        help="Model name to test")

    rot_parse = parser.add_mutually_exclusive_group()
    rot_parse.add_argument("-r",
                           "--rand_rot_angle",
                           dest="rand_rot_angle",
                           default=0.,
                           type=float,
                           help="Random image rotation angle range [deg]")
    rot_parse.add_argument(
        "-f",
        "--fixed_rot_angle",
        dest="fixed_rot_angle",
        nargs=3,
        type=float,
        help="(low, high, spacing) fixed image rotation angle [deg]")

    args = parser.parse_args()

    target_names = [
        'Planes', 'Cars', 'Birds', 'Cats', 'Deer', 'Dogs', 'Frogs', 'Horses',
        'Boats', 'Trucks'
    ]

    # Determine which rotation to apply
    run_fixed_rotation = False
    i_results_prefix = 'random'
    rot_angle_list = [args.rand_rot_angle]
    rot_comment = "Random rotation range (deg): [-{}, {}]".format(
        rot_angle_list[0], rot_angle_list[0])

    if args.fixed_rot_angle is not None:
        i_results_prefix = 'fixed'
        run_fixed_rotation = True
        ang_range = args.fixed_rot_angle
        rot_angle_list = np.arange(ang_range[0], ang_range[1], ang_range[2])
        rot_comment = "Fixed rotation angle(s) (deg): {}".format(
            rot_angle_list)

    # Get configuration file
    hconfig = ModelConfigurator(args.configfile)

    # Extract config parameters
    datapath = hconfig.datapath

    # Get requested models, if None, take config's list
    model_list = args.modelnames
    if model_list is None:
        model_list = hconfig.avail_models

    # Directory structures for data and model saving
    data_dir_struct = DataDirStruct(datapath)

    # Dictionary of test results
    out_dict = {}
    out_dict['theta'] = np.array(rot_angle_list, dtype='float32')

    # List of accuracies for each model
    acc_model_list = []
    loss_model_list = []

    # Loop over requested models
    for mod_i in model_list:

        mod_i = mod_i.strip()
        print('\nTesting {} over following rotations: {} ...\n'.format(
            mod_i, rot_angle_list))

        # Set model config parameters
        hconfig.model_config(mod_i)

        # Extract model path from config
        model_dir_struct = ModelDirStruct(main_dir=hconfig.model_outpath,
                                          test_model=True)

        ## Load model to test
        # Load pretrained model from file
        json_file = open(model_dir_struct.model_file, 'r')
        trained_model_json = json_file.read()
        json_file.close()
        trained_model = model_from_json(trained_model_json, custom_layer_dict)

        # Load weights into model
        trained_model.load_weights(model_dir_struct.weights_file)
        print("Loaded {} from disk".format(mod_i))

        # Compile trained model
        trained_model.compile(loss='categorical_crossentropy',
                              optimizer='rmsprop',
                              metrics=['accuracy'])

        # Print test results to file
        results_file = os.path.join(model_dir_struct.main_dir, 'tests.log')
        glob_text_file = open(results_file, 'w')

        glob_text_file.write('#Index\tAngle\tAccuracies\n')

        # List of accuracies for each rotation
        acc_rot_list = []
        loss_rot_list = []
        # Run over rotation angles in list,
        # or just single value used for random range
        for i, rot_angle in enumerate(rot_angle_list):

            print('On {} angle {}'.format(i_results_prefix, rot_angle))

            test_prefix = 'test_%s_rot_%03i' % (i_results_prefix, i)

            # Print test results to file
            i_results_file = os.path.join(model_dir_struct.main_dir,
                                          test_prefix + '.log')
            i_text_file = open(i_results_file, 'w')

            # Testing generator
            test_gen = test_img_generator(dir_struct=data_dir_struct,
                                          config_struct=hconfig,
                                          fixed_rotation=run_fixed_rotation,
                                          rotation_angle=rot_angle)

            # Truth labels for sample
            y_truth = test_gen.classes

            # Evaluate loaded model on test data
            scores = trained_model.evaluate_generator(test_gen,
                                                      steps=None,
                                                      verbose=1)
            print("Test %s: %.2f%%" %
                  (trained_model.metrics_names[1], scores[1] * 100))

            # Save each rotation loss & accuracy
            loss_rot_list.append(scores[0])
            acc_rot_list.append(scores[1])

            # Running prediction
            Y_pred = trained_model.predict_generator(test_gen,
                                                     steps=None,
                                                     verbose=1)
            y_predict = np.argmax(Y_pred, axis=1)

            # Confusion matrix
            print('Confusion Matrix')
            cm = confusion_matrix(y_truth, y_predict)
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print(cm)
            plot_confusion_matrix(cm=cm,
                                  classes=target_names,
                                  outname='cm_%s' % test_prefix,
                                  model_dir_struct=model_dir_struct)

            # Classification report
            print('Classification Report')
            class_report = classification_report(y_truth,
                                                 y_predict,
                                                 target_names=target_names)
            print(class_report)

            # Print test results to file
            i_text_file.write(
                '\n\nRotation Angle: {} deg\n\n'.format(rot_angle))
            i_text_file.write('\n\nConfusion Matrix:\n\n')
            i_text_file.write('{}'.format(cm))

            i_text_file.write('\n\n\nClassification Report:\n\n')
            i_text_file.write('{}'.format(class_report))
            i_text_file.close()
            print('Saved single rotation test results to {}'.format(
                i_results_file))

            # Saving accuracy diagonals to file
            glob_text_file.write('{}\t{}\t{}'.format(
                i, rot_angle, cm.diagonal()).replace('[', '').replace(']', ''))

        glob_text_file.close()
        print('Saved test results to {}'.format(results_file))

        # Model's accuracies
        acc_model_list.append(acc_rot_list)
        loss_model_list.append(loss_rot_list)
        out_dict[mod_i + '_accuracy'] = np.array(acc_rot_list, dtype='float32')
        out_dict[mod_i + '_loss'] = np.array(loss_rot_list, dtype='float32')
        print('Accuracies for {}: {}'.format(mod_i, acc_rot_list))

    print('\nRotations, accuracies and losses for all')
    print_dict(out_dict)

    if run_fixed_rotation:
        # Save test information to pickle file
        head_dir = os.path.split(model_dir_struct.main_dir)[0]
        model_names = '_'.join(model_list).replace(" ", "")

        rot_seq = rot_angle_list[0]
        rot_names = '%s' % rot_seq
        if len(rot_angle_list) > 1:
            rot_seq = (rot_angle_list[0], len(rot_angle_list) - 2,
                       rot_angle_list[-1])
            rot_names = '_'.join(map(str, rot_seq)).replace(" ", "")

        # Prefix
        pprefix = 'rot_' + i_results_prefix + '_test_' + model_names + "_" + rot_names

        # Pickel file
        pklname = pprefix + '.pkl'
        filename = os.path.join(head_dir, pklname)
        with open(filename, 'wb') as file_pi:
            pickle.dump(out_dict, file_pi)
        print("\nSaved rotation test to disk: {}\n".format(filename))

        # Plot rotation metrics
        plot_rotation_metrics(out_dict, ['Accuracy', 'Loss'], pprefix,
                              head_dir)
예제 #20
0
def compute_confusion_matrix(gt,
                             pred,
                             classes,
                             class_names,
                             user_producer=True,
                             normalize=False,
                             axis=1,
                             plot=True,
                             title=None):
    """Compute the confusion matrix 
    
    arguments
    ---------
        gt: numpy.ndarray
            one-hot lables of patches
            shape = (n_patches, patch_size_padded, patch_size_padded, n_classes)    
        pred: numpy.ndarray
            probability maps of classes of patches
            shape = (n_patches, patch_size_padded, patch_size_padded, n_classes)  
        user_producer: boolean
            if true: the user, producer, and total accuracy will be calculated
        normalize: boolean
            default=False
        axis: int
            one of 0 or 1. Default=1
            0 for division by column total and 1 for division by row total
            i.e. thus in the TP-cells if axis=0:User's acc, if axis=1:Producer's acc.
        plot: boolean
            if true: the confusion matrix will be plotted. default is True. 
        title: string
            title of the plot. default = None
        cmap: matplotlib color map
            cmap of the plot. default = plt.cm.Blues
            
    calls:
    ------
        compute_user_producer_acc()
        plot_confusion_matrix()

    returns
    -------
        cm: numpy.ndarray
            confuion matrix of shape (n_classes, nclasses)
    if plot=True
        plot: fig
            plot of the confusion amtrix
    """

    y_true = np.zeros(gt.shape[:3], dtype=np.uint8)
    y_pred = np.zeros(pred.shape[:3], dtype=np.uint8)
    for i in range(gt.shape[0]):
        y_true[i] = np.argmax(gt[i], axis=2)
        y_pred[i] = np.argmax(pred[i], axis=2)

    # Compute confusion matrix
    cm = confusion_matrix(y_true.flatten(), y_pred.flatten(), labels=classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=axis)[:, np.newaxis]

    if user_producer:
        if normalize:
            print("user and producer accuracy can only be calculated for un- \
                  normalized confusion matrices")
        else:
            cm = compute_user_producer_acc(cm)
            class_names.extend('accuracy')

    if plot:
        plot_confusion_matrix(cm, class_names, normalize, title)

    return (cm)
예제 #21
0
                os.path.join(
                    C.OUTPUT_DIR,
                    "{}SleepRNN_{}.pth".format("full_", model.details)))

            # Save results and confusion matrix in case of time-out
            with open(
                    C.OUTPUT_DIR +
                    "{}results_{}.csv".format("full_", model.details),
                    "w") as f:
                f.write("true,pred\n")
                for r in valid_results:
                    f.write("{},{}\n".format(r[0], r[1]))
                f.close()

            class_names = ['0', '1', '2', '3', '4']
            plot_confusion_matrix(valid_results, class_names, "full_",
                                  model.details)

    # plot learning curves
    plot_learning_curves(train_losses, valid_losses, train_accuracies,
                         valid_accuracies, "full_", model.details)
    """
	HYPERPARAMETER TUNING (COMMENT OUT IF NOT USING)
	"""
    # unit_range = [8, 16, 24, 32, 64]
    # layer_range = [2, 3, 4]
    # for fold in range(0, 3):
    # 	print("FOLD #{}".format(fold))
    # 	# Data loading
    # 	print("Train set loading")
    # 	train_path = C.SPLITS_DIR + 'cv_train{}.txt'.format(fold)
    # 	train_loader = import_to_dataloader(train_path)
예제 #22
0
def main():
    global args
    parser = argparse.ArgumentParser(
        description="Convolutional NN Testing Script")
    parser.add_argument("-c",
                        "--config",
                        dest="configfile",
                        default='config.yml',
                        help="Path to yaml configuration file")
    parser.add_argument("-m",
                        "--modelnames",
                        dest="modelnames",
                        nargs="*",
                        default=None,
                        required=False,
                        help="Model name to test")
    parser.add_argument("-n",
                        "--num_samples",
                        dest="num_samples",
                        default=10,
                        type=int,
                        help="Number of test samples")
    parser.add_argument("-s",
                        "--seed",
                        dest="rngseed",
                        default=123,
                        type=int,
                        help="RNG Seed to test different samples")

    rot_parse = parser.add_mutually_exclusive_group()
    rot_parse.add_argument("-r",
                           "--rand_rot_angle",
                           dest="rand_rot_angle",
                           default=0.,
                           type=float,
                           help="Random image rotation angle range [deg]")
    rot_parse.add_argument(
        "-f",
        "--fixed_rot_angle",
        dest="fixed_rot_angle",
        nargs=3,
        type=float,
        help="(low, high, spacing) fixed image rotation angle [deg]")

    args = parser.parse_args()

    # Get requested sample size
    num_samples = args.num_samples

    # Determine which rotation to apply
    run_fixed_rotation = False
    i_results_prefix = 'random'
    rot_angle_list = [args.rand_rot_angle]
    rot_comment = "Random rotation range (deg): [-{}, {}]".format(
        rot_angle_list[0], rot_angle_list[0])

    if args.fixed_rot_angle is not None:
        i_results_prefix = 'fixed'
        run_fixed_rotation = True
        ang_range = args.fixed_rot_angle
        rot_angle_list = np.arange(ang_range[0], ang_range[1], ang_range[2])
        rot_comment = "Fixed rotation angle(s) (deg): {}".format(
            rot_angle_list)

    # Get configuration file
    hconfig = ModelConfigurator(args.configfile)

    # Extract config parameters
    datapath = hconfig.datapath

    # Class names
    class_labels = hconfig.labels

    # Get requested models, if None, take config's list
    model_list = args.modelnames
    if model_list is None:
        model_list = hconfig.avail_models

    # Directory structures for data and model saving
    data_dir_struct = DataDirStruct(datapath)

    # Dictionary of test results
    out_dict = {}
    out_dict['theta'] = np.array(rot_angle_list, dtype='float32')

    # List of accuracies, losses, probs for each model
    acc_model_list = []
    loss_model_list = []
    prob_model_list = []

    # Loop over requested models
    for mod_i in model_list:

        mod_i = mod_i.strip()
        print('\nTesting {} over following rotations: {} ...\n'.format(
            mod_i, rot_angle_list))

        # Set model config parameters
        hconfig.model_config(mod_i)

        # Extract model path from config
        model_dir_struct = ModelDirStruct(main_dir=hconfig.model_outpath,
                                          test_model=True)

        ## Load model to test
        # Load pretrained model from file
        json_file = open(model_dir_struct.model_file, 'r')
        trained_model_json = json_file.read()
        json_file.close()
        trained_model = model_from_json(trained_model_json, custom_layer_dict)

        # Load weights into model
        trained_model.load_weights(model_dir_struct.weights_file)
        print("Loaded model from disk")

        # Compile trained model
        trained_model.compile(loss='categorical_crossentropy',
                              optimizer='rmsprop',
                              metrics=['accuracy'])

        # List of accuracies for each rotation
        prob_rot_list = []
        acc_rot_list = []
        loss_rot_list = []
        # Run over rotation angles in list,
        # or just single value used for random range
        for i, rot_angle in enumerate(rot_angle_list):

            print('On {} angle {}'.format(i_results_prefix, rot_angle))

            # Choose same batch
            np.random.seed(args.rngseed)

            test_prefix = 'test_%s_rot_%.0f' % (i_results_prefix, rot_angle)

            # Testing generator
            test_gen = test_img_generator(dir_struct=data_dir_struct,
                                          config_struct=hconfig,
                                          fixed_rotation=run_fixed_rotation,
                                          rotation_angle=rot_angle)

            # Get Samples
            x_batch, y_truth = test_gen.next()

            # Evaluate scores
            scores = trained_model.evaluate(x_batch, y_truth, verbose=1)
            print("Test %s: %.2f%%" %
                  (trained_model.metrics_names[1], scores[1] * 100))
            # Predict classification
            Y_pred = trained_model.predict(x_batch)
            y_predict = np.argmax(Y_pred, axis=1)

            # Confusion matrix
            print('Confusion Matrix')
            cm = confusion_matrix(np.argmax(y_truth, axis=1), y_predict)
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print(cm)
            plot_confusion_matrix(cm=cm,
                                  classes=class_labels,
                                  outname='cm_%s' % test_prefix,
                                  model_dir_struct=model_dir_struct)

            # Classification report
            print('Classification Report')
            class_report = classification_report(np.argmax(y_truth, axis=1),
                                                 y_predict,
                                                 target_names=class_labels)
            print(class_report)

            # Mean accuracy for batch
            # Save each rotation loss & accuracy
            loss_rot_list.append(scores[0])
            acc_rot_list.append(scores[1])
            # Mean classification probability for truth class
            mean_prob = np.sum(Y_pred * y_truth) / num_samples
            # Save each rotation loss & accuracy
            prob_rot_list.append(mean_prob)

        # Model's accuracies
        acc_model_list.append(acc_rot_list)
        loss_model_list.append(loss_rot_list)
        prob_model_list.append(prob_rot_list)
        out_dict[mod_i + '_accuracy'] = np.array(acc_rot_list, dtype='float32')
        out_dict[mod_i + '_loss'] = np.array(loss_rot_list, dtype='float32')
        out_dict[mod_i + '_probability'] = np.array(prob_rot_list,
                                                    dtype='float32')
        print('Accuracies for {}: {}'.format(mod_i, acc_rot_list))
        print('Mean Accuracy for {}: {}'.format(
            mod_i, np.mean(np.array(acc_rot_list))))
        print('StdD Accuracy for {}: {}'.format(mod_i,
                                                np.std(
                                                    np.array(acc_rot_list))))

    print('\nRotations and accuracies for all')
    print_dict(out_dict)

    print('Saved some figures in {}'.format(model_dir_struct.plots_dir))

    if run_fixed_rotation:
        # Save test information to pickle file
        head_dir = os.path.split(model_dir_struct.main_dir)[0]
        model_names = '_'.join(model_list).replace(" ", "")

        rot_seq = rot_angle_list[0]
        rot_names = '%s' % rot_seq
        if len(rot_angle_list) > 1:
            rot_seq = (rot_angle_list[0], len(rot_angle_list) - 2,
                       rot_angle_list[-1])
            rot_names = '_'.join(map(str, rot_seq)).replace(" ", "")

        # Prefix
        pprefix = 'rot_' + i_results_prefix + \
                  '_batch_' + str(num_samples) + '_test_' + \
                  model_names + "_" + rot_names

        # Pickel file
        pklname = pprefix + '.pkl'
        filename = os.path.join(head_dir, pklname)
        with open(filename, 'wb') as file_pi:
            pickle.dump(out_dict, file_pi)
        print("\nSaved rotation test to disk: {}\n".format(filename))

        # Plot rotation metrics
        plot_rotation_metrics(out_dict, ['Accuracy', 'Loss', 'Probability'],
                              pprefix, head_dir)
예제 #23
0
def model_performance(Xtrain,
                      Xtest,
                      Ytrain,
                      Ytest,
                      k=5,
                      randseed=545510477,
                      analysis_type='sepsis1',
                      balanced_class_weight=True):
    '''
    :param Xtrain, Xtest, Ytrain, Ytest: Train and test data
    :param k: k for K-fold cross-validation
    :param randseed: seed for randomizer
    :param analysis_type: String for type of analysis to be used as filename
    :return: None
    '''

    print("Model performance start")

    # Grid-search hyperparameter optimization
    # Create regularization hyperparameter space
    C = np.power(2, np.arange(0, 20, 2)) * 0.1
    logr = LogisticRegression(
        random_state=randseed,
        max_iter=
        1000  # Use this if the solver doesn't converge; Increases processing time
    )
    # Parameters to test for hyperparameter optimization
    if balanced_class_weight:
        class_weight_param = 'balanced'
    else:
        class_weight_param = None

    param_grid = {
        # 'pca__n_components': [2, 4, 6, 9], # comment out line if all features to be used
        'logr__C': C,
        'logr__penalty': ['l1', 'l2'],
        # 'logr__solver': ['newton-cg', 'lbfgs', 'sag'],  # for l2 penalty
        # 'logr__solver': ['liblinear','saga'], # for l1 penalty
        'logr__solver': ['newton-cg', 'lbfgs', 'liblinear', 'saga'],
        'logr__class_weight': [class_weight_param]
    }

    # Pipeline to optimize PCA and Logistic Regression parameters
    pca = PCA()
    pipe = Pipeline(steps=[('pca', pca), ('logr', logr)])
    clf = GridSearchCV(pipe,
                       param_grid,
                       cv=5,
                       scoring='roc_auc',
                       error_score=0.0,
                       verbose=0)
    print("   GridSearchCV hyperparameter optimization start")
    clf.fit(Xtrain, Ytrain)
    print("   GridSearchCV hyperparameter optimization end")
    modelFolder = '../../output/models'
    distutils.dir_util.mkpath(modelFolder)
    filename = modelFolder + '/' + analysis_type + '_model.sav'

    # Save best model as file
    pickle.dump(clf, open(filename, 'wb'))

    # Using best performing parameters for LR classifier
    acc, auc_ = get_acc_auc_kfold(clf.best_estimator_, Xtrain, Ytrain, k=k)

    print("______________________________________________")
    print(("Classifier: Logistic Regression"))
    print("Best parameter (CV score=%0.3f):" % clf.best_score_)
    print(clf.best_params_)

    print(("Average Accuracy in KFold CV: %0.4f" % acc))
    print(("Average AUC in KFold CV:      %0.4f" % auc_))

    # Compute ROC curve and ROC area
    fpr, tpr, _ = roc_curve(Ytest, clf.predict_proba(Xtest)[:, 1])
    roc_auc = auc(fpr, tpr)

    # Check model performance on test set
    Ytest_pred = clf.best_estimator_.predict(Xtest)
    acc = accuracy_score(Ytest, Ytest_pred)
    auc_ = roc_auc_score(Ytest, Ytest_pred)

    print(("Accuracy in Test set:         %0.4f" % acc))
    print(("AUC in Test set:              %0.4f" % auc_))
    print("")
    # print (Ytest_pred)

    class_names = ['Control', 'Cases']

    # Compute confusion matrix
    cnf_matrix = confusion_matrix(Ytest, Ytest_pred)
    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          analysis_type=analysis_type,
                          normalize=False,
                          title='Confusion matrix, without normalization')
    print("______________________________________________")

    return fpr, tpr, roc_auc
예제 #24
0
def evaluate(segmentation_module, loader, cfg, gpu, activations, num_class,
             patch_size, patch_size_padded, class_names, channels, index_test,
             visualize, results_dir, arch_encoder):
    acc_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    acc_meter_patch = AverageMeter()
    intersection_meter_patch = AverageMeter()
    union_meter_patch = AverageMeter()
    time_meter = AverageMeter()

    # initiate confusion matrix
    conf_matrix = np.zeros((num_class, num_class))
    conf_matrix_patch = np.zeros((num_class, num_class))
    # turn on for initialise for umap
    area_activations_mean = np.zeros((len(index_test), 32 // 4 * 32 // 4))
    area_activations_max = np.zeros((len(index_test), 32 // 4 * 32 // 4))
    area_cl = np.zeros((len(index_test), ), dtype=np.int)
    area_loc = np.zeros((len(index_test), 3), dtype=np.int)
    j = 0

    segmentation_module.eval()

    pbar = tqdm(total=len(loader))
    for batch_data in loader:

        # process data
        batch_data = batch_data[0]
        seg_label = as_numpy(batch_data['seg_label'][0])
        img_resized_list = batch_data['img_data']

        torch.cuda.synchronize()
        tic = time.perf_counter()
        with torch.no_grad():
            segSize = (seg_label.shape[0], seg_label.shape[1])
            scores = torch.zeros(1, num_class, segSize[0], segSize[1])
            scores = async_copy_to(scores, gpu)

            for img in img_resized_list:
                feed_dict = batch_data.copy()
                feed_dict['img_data'] = img
                del feed_dict['img_ori']
                del feed_dict['info']
                feed_dict = async_copy_to(feed_dict, gpu)

                # forward pass
                scores_tmp = segmentation_module(feed_dict, segSize=segSize)
                scores = scores + scores_tmp

            _, pred = torch.max(scores, dim=1)
            pred = as_numpy(pred.squeeze(0).cpu())

        torch.cuda.synchronize()
        time_meter.update(time.perf_counter() - tic)

        # calculate accuracy
        acc, pix = accuracy(pred, seg_label)
        acc_patch, pix_patch = accuracy(
            pred[patch_size:2 * patch_size, patch_size:2 * patch_size],
            seg_label[patch_size:2 * patch_size, patch_size:2 * patch_size])

        intersection, union = intersectionAndUnion(pred, seg_label, num_class)
        intersection_patch, union_patch = intersectionAndUnion(
            pred[patch_size:2 * patch_size, patch_size:2 * patch_size],
            seg_label[patch_size:2 * patch_size,
                      patch_size:2 * patch_size], num_class)

        acc_meter.update(acc, pix)
        intersection_meter.update(intersection)
        union_meter.update(union)
        acc_meter_patch.update(acc_patch, pix_patch)
        intersection_meter_patch.update(intersection_patch)
        union_meter_patch.update(union_patch)

        conf_matrix = updateConfusionMatrix(conf_matrix, pred, seg_label)

        # update conf matrix patch
        conf_matrix_patch = updateConfusionMatrix(
            conf_matrix_patch, pred[patch_size:2 * patch_size,
                                    patch_size:2 * patch_size],
            seg_label[patch_size:2 * patch_size, patch_size:2 * patch_size])

        # visualization
        if visualize:
            info = batch_data['info']
            img_name = info.split('/')[-1]
            #np.save(os.path.join(test_dir, 'result', img_name), pred)
            np.save(os.path.join(results_dir, img_name), pred)


# =============================================================================
#         if visualize:
#             visualize_result(
#                 (batch_data['img_ori'], seg_label, batch_data['info']),
#                 pred,
#                 os.path.join(test_dir, 'result')
#             )
# =============================================================================

        pbar.update(1)

        # turn on for UMAP
        row, col, cl = find_constant_area(
            seg_label, 32, patch_size_padded
        )  #TODO patch_size_padded must be patch_size if only inner patch is checked.
        if not (row == 999999):
            activ_mean = np.mean(
                as_numpy(activations.features.squeeze(0).cpu()),
                axis=0,
                keepdims=True)[:, row // 4:row // 4 + 8,
                               col // 4:col // 4 + 8].reshape(1, 8 * 8)
            activ_max = np.max(as_numpy(activations.features.squeeze(0).cpu()),
                               axis=0,
                               keepdims=True)[:, row // 4:row // 4 + 8,
                                              col // 4:col // 4 + 8].reshape(
                                                  1, 8 * 8)

            area_activations_mean[j] = activ_mean
            area_activations_max[j] = activ_max
            area_cl[j] = cl
            area_loc[j, 0] = row
            area_loc[j, 1] = col
            area_loc[j, 2] = int(batch_data['info'].split('.')[0])
            j += 1
        else:
            area_activations_mean[j] = np.full((1, 64),
                                               np.nan,
                                               dtype=np.float32)
            area_activations_max[j] = np.full((1, 64),
                                              np.nan,
                                              dtype=np.float32)
            area_cl[j] = 999999
            area_loc[j, 0] = row
            area_loc[j, 1] = col
            area_loc[j, 2] = int(batch_data['info'].split('.')[0])
            j += 1

        #activ = np.mean(as_numpy(activations.features.squeeze(0).cpu()),axis=0)[row//4:row//4+8, col//4:col//4+8]
        #activ = as_numpy(activations.features.squeeze(0).cpu())

    # summary
    iou = intersection_meter.sum / (union_meter.sum + 1e-10)
    for i, _iou in enumerate(iou):
        print('class [{}], IoU: {:.4f}'.format(i, _iou))
    iou_patch = intersection_meter_patch.sum / (union_meter_patch.sum + 1e-10)
    for i, _iou_patch in enumerate(iou_patch):
        print('class [{}], patch IoU: {:.4f}'.format(i, _iou_patch))

    print('[Eval Summary]:')
    print(
        'Mean IoU: {:.4f}, Accuracy: {:.2f}%, Inference Time: {:.4f}s'.format(
            iou.mean(),
            acc_meter.average() * 100, time_meter.average()))
    print(
        'Patch: Mean IoU: {:.4f}, Accuracy: {:.2f}%, Inference Time: {:.4f}s'.
        format(iou_patch.mean(),
               acc_meter_patch.average() * 100, time_meter.average()))

    print('Confusion matrix:')
    plot_confusion_matrix(conf_matrix,
                          class_names,
                          normalize=True,
                          title='confusion matrix patch+padding',
                          cmap=plt.cm.Blues)
    plot_confusion_matrix(conf_matrix_patch,
                          class_names,
                          normalize=True,
                          title='confusion matrix patch',
                          cmap=plt.cm.Blues)

    np.save(os.path.join(results_dir, 'confmatrix.npy'), conf_matrix)
    np.save(os.path.join(results_dir, 'confmatrix_patch.npy'),
            conf_matrix_patch)
    # turn on for UMAP
    np.save(os.path.join(results_dir, 'activations_mean.npy'),
            area_activations_mean)
    np.save(os.path.join(results_dir, 'activations_max.npy'),
            area_activations_max)
    np.save(os.path.join(results_dir, 'activations_labels.npy'), area_cl)
    np.save(os.path.join(results_dir, 'activations_loc.npy'), area_loc)

    mcc = compute_mcc(conf_matrix)
    mcc_patch = compute_mcc(conf_matrix_patch)
    # save summary of results in csv
    summary = pd.DataFrame([[
        arch_encoder, patch_size, channels,
        acc_meter.average(),
        acc_meter_patch.average(),
        iou.mean(),
        iou_patch.mean(), mcc, mcc_patch
    ]],
                           columns=[
                               'model', 'patch_size', 'channels',
                               'test_accuracy', 'test_accuracy_patch',
                               'meanIoU', 'meanIoU_patch', 'mcc', 'mcc_patch'
                           ])
    summary.to_csv(os.path.join(results_dir, 'summary_results.csv'))
예제 #25
0
def main(args):
    writer = SummaryWriter(comment=args.exp_name)
    os.makedirs(args.weights, exist_ok=True)

    train_transform = iaa.Sequential([
        iaa.Resize((args.size, args.size)),
        iaa.Fliplr(p=0.5),
        iaa.Flipud(p=0.5),
        iaa.Rotate(rotate=(-180, 180)),
        iaa.AdditivePoissonNoise(lam=(0, 10.,)),
        iaa.GammaContrast(gamma=(.5, 1.5)),
        iaa.GaussianBlur(sigma=(.0, .8)),
        iaa.Sometimes(0.25, iaa.CoarseDropout(p=(0, 0.03), size_percent=(0, 0.05))),
    ])

    valid_transform = iaa.Sequential([
        iaa.Resize((args.size, args.size)),
    ])

    train_dataset = YAMLClassificationDataset(dataset=args.in_ds, transform=train_transform, split=['training'],
                                              normalization=normalization_isic)
    valid_dataset = YAMLClassificationDataset(dataset=args.in_ds, transform=valid_transform, split=['validation'],
                                              normalization=normalization_isic)
    test_dataset = YAMLClassificationDataset(dataset=args.in_ds, transform=valid_transform, split=['test'],
                                             normalization=normalization_isic)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers,
                                  drop_last=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers,
                                  drop_last=False)
    test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers,
                                 drop_last=False)

    dataloaders = {"train": train_dataloader, "valid": valid_dataloader, 'test': test_dataloader}
    device = torch.device('cpu' if not args.gpu else 'cuda')

    # Model, loss, optimizer
    print('Loading model...')
    model = SkinLesionModel(args.model)

    if args.onnx_export:
        # export onnx
        dummy_input = torch.ones(4, 3, args.size, args.size, device='cpu')
        model.train()
        torch.onnx.export(model, dummy_input, f'{args.model}.onnx', verbose=True, export_params=True,
                          training=torch.onnx.TrainingMode.TRAINING,
                          opset_version=12,
                          do_constant_folding=False,
                          input_names=['input'],
                          output_names=['output'],
                          dynamic_axes={'input': {0: 'batch_size'},  # variable length axes
                                        'output': {0: 'batch_size'}})

    # Change last linear layer
    model.fc = torch.nn.Linear(model.fc.in_features, args.num_classes)

    if torch.cuda.device_count() > 1 and args.gpu:
        model = torch.nn.DataParallel(model, device_ids=np.where(np.array(args.gpu) == 1)[0])
    print(f'Move model to {device}')
    model = model.to(device)

    # loss_fn = nn.modules.loss.CrossEntropyLoss(weight=torch.from_numpy(get_weights()).to(device))
    loss_fn = nn.modules.loss.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

    if args.ckpts is None:
        best_valid_acc = 0.
        load_epoch = 0
    else:
        checkpoint = torch.load(args.ckpts)
        model.load_state_dict(checkpoint['state_dict'])
        load_epoch = checkpoint['epoch']
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_valid_acc = checkpoint['best_metric']
        print("Loaded checkpoint epoch ", load_epoch, " with best metric ", best_valid_acc)

    train_acc = 0
    valid_acc = 0
    print('Starting training')
    for epoch in range(load_epoch, args.epochs):
        loss_train = []
        loss_valid = []
        for phase in ["train", "valid"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            correct = 0
            total = 0
            pred_list = []
            gt_list = []
            with tqdm(desc=f"{phase} {epoch}/{args.epochs}", unit="batch", total=len(dataloaders[phase]),
                      file=sys.stdout) as pbar:
                for i, (x, gt, names) in enumerate(dataloaders[phase]):
                    # torchvision.utils.save_image(x, f'batch_{i}.jpg')
                    x, gt = x.to(device), gt.to(device)
                    with torch.set_grad_enabled(phase == "train"):
                        pred = model(x)
                        loss = loss_fn(pred, gt)
                        loss_item = loss.item()
                        pred = torch.nn.functional.softmax(pred, dim=1)

                        pred_np = pred.detach().cpu().numpy()
                        pred_np = pred_np.argmax(axis=1)
                        pred_list.extend(pred_np)
                        gt_np = gt.detach().cpu().numpy()
                        gt_list.extend(gt_np)

                        correct += (pred_np == gt_np).sum()
                        total += pred_np.shape[0]

                        if phase == "train":
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()
                            loss_train.append(loss_item)

                        elif phase == "valid":
                            loss_valid.append(loss_item)

                        pbar.set_postfix(loss=loss_item, accuracy=correct / total)
                        pbar.update()

            accuracy = correct / total
            cm = confusion_matrix(np.array(pred_list).reshape(-1), np.array(gt_list).reshape(-1))
            print(f'{phase} {epoch}/{args.epochs}: accuracy={accuracy:.4f}')
            fig = plt.figure(figsize=(args.num_classes, args.num_classes))
            plot_confusion_matrix(cm, [0, 1, 2, 3, 4, 5, 6, 7])
            writer.add_figure(f'{phase}/confusion', fig, epoch)

            if phase == 'train':
                train_acc = accuracy
                writer.add_scalar(f'{phase}/loss', np.mean(loss_train), epoch)
                writer.add_scalar(f'{phase}/accuracy', train_acc, epoch)

            else:
                valid_acc = accuracy
                writer.add_scalar(f'{phase}/loss', np.mean(loss_valid), epoch)
                writer.add_scalar(f'{phase}/accuracy', valid_acc, epoch)

        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_metric': best_valid_acc
            }
            torch.save(state, os.path.join(args.weights, f'{args.model}.pth'))
예제 #26
0
 def plot_confusion_matrix(cm, labels, split):
     fig, ax = plots.plot_confusion_matrix(cm, labels, split)
     fig.savefig(evaluation_dir + '/confusion_matrix_%s.png' % split)
예제 #27
0
 def plot_confusion_matrix(cm, labels, split):
     fig, ax = plots.plot_confusion_matrix(cm, labels, split)
     fig.savefig(evaluation_dir + '/confusion_matrix_%s.png' % split)
예제 #28
0
    valid_losses.append(valid_loss)

    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)

    is_best = valid_accuracy > best_val_acc  # let's keep the model that has the best accuracy, but you can also use another metric.
    if is_best:
        best_val_acc = valid_accuracy
        torch.save(model, os.path.join(PATH_OUTPUT, "MyVariableRNN.pth"))

best_model = torch.load(os.path.join(PATH_OUTPUT, "MyVariableRNN.pth"))
plot_learning_curves(train_losses, valid_losses, train_accuracies,
                     valid_accuracies)

class_names = ['Alive', 'Dead']
plot_confusion_matrix(valid_results, class_names)


# TODO: Complete predict_mortality
def predict_mortality(model, device, data_loader):
    model.eval()
    # TODO: Evaluate the data (from data_loader) using model,
    # TODO: return a List of probabilities
    results = []
    # https://piazza.com/class/jjjilbkqk8m1r4?cid=1103
    with torch.no_grad():
        for i, (input, target) in enumerate(data_loader):

            if isinstance(input, tuple):
                input = tuple([
                    e.to(device) if type(e) == torch.Tensor else e
AI_Enhancer_probs = A_I_Enhancer_classifier.predict_proba(A_I_Enhancer_X_test)
AI_Enhancer_probs = AI_Enhancer_probs[:, 1]
y_AI_Enhancer_pred_labels = A_I_Enhancer_encoder.inverse_transform(
    y_AI_Enhancer_pred)
y_AI_Enhancer_test_labels = A_I_Enhancer_encoder.inverse_transform(
    A_I_Enhancer_y_test)

cm = confusion_matrix(y_AI_Enhancer_test_labels, y_AI_Enhancer_pred_labels)
print('Confusion Matrix:\n')
print(cm)
print('Accuracy score: ' +
      str(accuracy_score(A_I_Enhancer_y_test, y_AI_Enhancer_pred)))
print('F1 score: ' + str(f1_score(A_I_Enhancer_y_test, y_AI_Enhancer_pred)) +
      '\n')
plot_confusion_matrix(cm,
                      filename='A_I_Enhancer_RF_cm.png',
                      target_names=['A-E', 'I-E'],
                      title='Active Inactive Enhancer Random Forest')
fpr, tpr, roc_threshold = roc_curve(A_I_Enhancer_y_test, AI_Enhancer_probs)
precision, recall, precision_thresholds = precision_recall_curve(
    A_I_Enhancer_y_test, AI_Enhancer_probs)
roc_auc = auc(fpr, tpr)
pr_auc = auc(recall, precision)
print('AUROC: ' + str(roc_auc))
print('AUPRC: ' + str(pr_auc))
plotRoc_curve(fpr, tpr, roc_auc, 'AI_Enhancer_RF_roc.png',
              'ROC curve Active Inactive Enhancer Random Forest')
plotPrecisionRecall_curve(precision, recall, pr_auc, 'AI_Enhancer_RF_pr.png',
                          'P-R curve Active Inactive Enhancer Random Forest')

#Training and testing Active Inactive Enhancer Neural Network
print('Training Active Inactive Enhancer Neural Network')
def A_Enh_prom_NeuralNetwork(X_train, y_train, balanced=False):

    if balanced:
        name = 'balanced'
    else:
        name = ''

    print('Training Active Enhancer Active Promoter Neural Network ' + name)
    keras_classifier = KerasClassifier(build_fn=create_model,
                                       input_units=0,
                                       hidden_layers=0,
                                       hidden_units=0)
    param_grid = [{
        'input_units': [X_train.shape[1]],
        'hidden_layers': [1, 2, 3],
        'hidden_units': [10, 20, 50],
        'batch_size': [1000],
        'epochs': [100]
    }]
    AEP_neural_network = GridSearchCV(estimator=keras_classifier,
                                      param_grid=param_grid,
                                      scoring='f1',
                                      n_jobs=-1,
                                      cv=3)
    AEP_neural_network = AEP_neural_network.fit(X_train, y_train)
    print('best neural network parameters are: \n')
    print(AEP_neural_network.best_params_)
    print('best accuracy on 3-fold cross validation: ' +
          str(AEP_neural_network.best_score_))

    y_A_Enh_Prom_pred = AEP_neural_network.predict(A_Enh_Prom_X_test)
    A_Enh_Prom_probs = AEP_neural_network.predict_proba(A_Enh_Prom_X_test)
    A_Enh_Prom_probs = A_Enh_Prom_probs[:, 1]
    y_A_Enh_Prom_pred_labels = A_Enh_Prom_encoder.inverse_transform(
        y_A_Enh_Prom_pred)
    y_A_Enh_Prom_test_labels = A_Enh_Prom_encoder.inverse_transform(
        A_Enh_Prom_y_test)

    cm = confusion_matrix(y_A_Enh_Prom_test_labels, y_A_Enh_Prom_pred_labels)
    print('Confusion Matrix:\n')
    print(cm)
    plot_confusion_matrix(
        cm,
        filename='A_Enh_Prom_NN_cm_' + name + '.png',
        target_names=['A-P', 'A-E'],
        title='Active Enhancer Active Promoter Neural Network ' + name)
    print('Accuracy score: ' +
          str(accuracy_score(A_Enh_Prom_y_test, y_A_Enh_Prom_pred)))
    print('F1 score: ' + str(f1_score(A_Enh_Prom_y_test, y_A_Enh_Prom_pred)) +
          '\n')
    fpr, tpr, roc_threshold = roc_curve(A_Enh_Prom_y_test, A_Enh_Prom_probs)
    precision, recall, precision_thresholds = precision_recall_curve(
        A_Enh_Prom_y_test, A_Enh_Prom_probs)
    roc_auc = auc(fpr, tpr)
    pr_auc = auc(recall, precision)
    print('AUROC: ' + str(roc_auc))
    print('AUPRC: ' + str(pr_auc))
    plotRoc_curve(
        fpr, tpr, roc_auc, 'A_Enh_Prom_NN_roc_' + name + '.png',
        'ROC curve Active Enhancer Active Promoter Neural Network ' + name)
    plotPrecisionRecall_curve(
        precision, recall, pr_auc, 'A_Enh_Prom_NN_pr_' + name + '.png',
        'P-R curve Active Enhancer Active Promoter Neural Network ' + name)
예제 #31
0
파일: ml.py 프로젝트: weimingtom/kws_game
  # print accuracy
  eval_log = eval_score.info_log(do_print=False)


  # --
  # info output

  # log to file
  if cfg['ml']['logging_enabled']:
    logging.info(eval_log)

  # print confusion matrix
  print("confusion matrix:\n{}\n".format(eval_score.cm))

  # plot confusion matrix
  plot_confusion_matrix(eval_score.cm, batch_archiv.classes, plot_path=path_coll.model_path, name='confusion_test')


  # --
  # evaluation on my set
  if batch_archiv.x_my is not None:

    print("\n--Evaluation on My Set:")

    # evaluation of model
    eval_score = nn_handler.eval_nn(eval_set='my', batch_archiv=batch_archiv, calc_cm=True, verbose=True)
    print("confusion matrix:\n{}\n".format(eval_score.cm))

    # plot confusion matrix
    plot_confusion_matrix(eval_score.cm, batch_archiv.classes, plot_path=path_coll.model_path, name='confusion_my')