def plot_SVM(prediction, label_data, label_type, show_plots=False, alpha=0.95, ensemble=False, verbose=True, ensemble_scoring=None, output='stats', modus='singlelabel'): ''' Plot the output of a single binary estimator, e.g. a SVM. Parameters ---------- prediction: pandas dataframe or string, mandatory output of trainclassifier function, either a pandas dataframe or a HDF5 file label_data: string, mandatory Contains the path referring to a .txt file containing the patient label(s) and value(s) to be used for learning. See the Github Wiki for the format. label_type: string, mandatory Name of the label to extract from the label data to test the estimator on. show_plots: Boolean, default False Determine whether matplotlib performance plots are made. alpha: float, default 0.95 Significance of confidence intervals. ensemble: False, integer or 'Caruana' Determine whether an ensemble will be created. If so, either provide an integer to determine how many of the top performing classifiers should be in the ensemble, or use the string "Caruana" to use smart ensembling based on Caruana et al. 2004. verbose: boolean, default True Plot intermedate messages. ensemble_scoring: string, default None Metric to be used for evaluating the ensemble. If None, the option set in the prediction object will be used. output: string, default stats Determine which results are put out. If stats, the statistics of the estimator will be returned. If scores, the scores will be returned. Returns ---------- Depending on the output parameters, the following outputs are returned: If output == 'stats': stats: dictionary Contains the confidence intervals of the performance metrics and the number of times each patient was classifier correctly or incorrectly. If output == 'scores': y_truths: list Contains the true label for each object. y_scores: list Contains the score (e.g. posterior) for each object. y_predictions: list Contains the predicted label for each object. PIDs: list Contains the patient ID/name for each object. ''' # Load the prediction object if it's a hdf5 file if type(prediction) is not pd.core.frame.DataFrame: if os.path.isfile(prediction): prediction = pd.read_hdf(prediction) else: raise ae.WORCIOError(('{} is not an existing file!').format(str(prediction))) # Select the estimator from the pandas dataframe to use keys = prediction.keys() SVMs = list() if label_type is None: label_type = keys[0] # Load the label data if type(label_data) is not dict: if os.path.isfile(label_data): if type(label_type) is not list: # Singlelabel: convert to list label_type = [[label_type]] label_data = lp.load_labels(label_data, label_type) patient_IDs = label_data['patient_IDs'] labels = label_data['label'] if type(label_type) is list: # FIXME: Support for multiple label types not supported yet. print('[WORC Warning] Support for multiple label types not supported yet. Taking first label for plot_SVM.') label_type = keys[0] # Extract the estimators, features and labels SVMs = prediction[label_type]['classifiers'] regression = is_regressor(SVMs[0].best_estimator_) Y_test = prediction[label_type]['Y_test'] X_test = prediction[label_type]['X_test'] X_train = prediction[label_type]['X_train'] Y_train = prediction[label_type]['Y_train'] feature_labels = prediction[label_type]['feature_labels'] # Create lists for performance measures sensitivity = list() specificity = list() precision = list() accuracy = list() auc = list() f1_score_list = list() patient_classification_list = dict() if output in ['scores', 'decision']: # Keep track of all groundth truths and scores y_truths = list() y_scores = list() y_predictions = list() PIDs = list() # Loop over the test sets, which probably correspond with cross validation # iterations for i in range(0, len(Y_test)): print("\n") print(("Cross validation {} / {}.").format(str(i + 1), str(len(Y_test)))) test_patient_IDs = prediction[label_type]['patient_ID_test'][i] train_patient_IDs = prediction[label_type]['patient_ID_train'][i] X_test_temp = X_test[i] X_train_temp = X_train[i] Y_train_temp = Y_train[i] Y_test_temp = Y_test[i] test_indices = list() # Check which patients are in the test set. for i_ID in test_patient_IDs: test_indices.append(np.where(patient_IDs == i_ID)[0][0]) # Initiate counting how many times a patient is classified correctly if i_ID not in patient_classification_list: patient_classification_list[i_ID] = dict() patient_classification_list[i_ID]['N_test'] = 0 patient_classification_list[i_ID]['N_correct'] = 0 patient_classification_list[i_ID]['N_wrong'] = 0 patient_classification_list[i_ID]['N_test'] += 1 # Extract ground truth y_truth = Y_test_temp # If requested, first let the SearchCV object create an ensemble if ensemble: # NOTE: Added for backwards compatability if not hasattr(SVMs[i], 'cv_iter'): cv_iter = list(SVMs[i].cv.split(X_train_temp, Y_train_temp)) SVMs[i].cv_iter = cv_iter # Create the ensemble X_train_temp = [(x, feature_labels) for x in X_train_temp] SVMs[i].create_ensemble(X_train_temp, Y_train_temp, method=ensemble, verbose=verbose, scoring=ensemble_scoring) # Create prediction y_prediction = SVMs[i].predict(X_test_temp) if regression: y_score = y_prediction else: y_score = SVMs[i].predict_proba(X_test_temp)[:, 1] print("Truth: " + str(y_truth)) print("Prediction: " + str(y_prediction)) # Add if patient was classified correctly or not to counting for i_truth, i_predict, i_test_ID in zip(y_truth, y_prediction, test_patient_IDs): if modus == 'multilabel': success = (i_truth == i_predict).all() else: success = i_truth == i_predict if success: patient_classification_list[i_test_ID]['N_correct'] += 1 else: patient_classification_list[i_test_ID]['N_wrong'] += 1 y_score = SVMs[i].predict_proba(X_test_temp)[:, 1] if output == 'decision': # Output the posteriors y_scores.append(y_score) y_truths.append(y_truth) y_predictions.append(y_prediction) PIDs.append(test_patient_IDs) elif output == 'scores': # Output the posteriors y_scores.append(y_score) y_truths.append(y_truth) y_predictions.append(y_prediction) PIDs.append(test_patient_IDs) elif output == 'stats': # Compute statistics # Compute confusion matrix and use for sensitivity/specificity if modus == 'singlelabel': # Compute singlelabel performance metrics if not regression: accuracy_temp, sensitivity_temp, specificity_temp,\ precision_temp, f1_score_temp, auc_temp =\ metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) else: r2score, MSE, coefICC, PearsonC, PearsonP, SpearmanC,\ SpearmanP =\ metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) elif modus == 'multilabel': # Convert class objects to single label per patient y_truth_temp = list() y_prediction_temp = list() for yt, yp in zip(y_truth, y_prediction): label = np.where(yt == 1) if len(label) > 1: raise ae.WORCNotImplementedError('Multiclass classification evaluation is not supported in WORC.') y_truth_temp.append(label[0][0]) label = np.where(yp == 1) y_prediction_temp.append(label[0][0]) y_truth = y_truth_temp y_prediction = y_prediction_temp # Compute multilabel performance metrics accuracy_temp, sensitivity_temp, specificity_temp,\ precision_temp, f1_score_temp, auc_temp =\ metrics.performance_multilabel(y_truth, y_prediction, y_score) else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) # Print AUC to keep you up to date print('AUC: ' + str(auc_temp)) # Append performance to lists for all cross validations accuracy.append(accuracy_temp) sensitivity.append(sensitivity_temp) specificity.append(specificity_temp) auc.append(auc_temp) f1_score_list.append(f1_score_temp) precision.append(precision_temp) if output in ['scores', 'decision']: # Return the scores and true values of all patients return y_truths, y_scores, y_predictions, PIDs elif output == 'stats': # Compute statistics # Extract sample size N_1 = float(len(train_patient_IDs)) N_2 = float(len(test_patient_IDs)) # Compute alpha confidence intervallen stats = dict() stats["Accuracy 95%:"] = str(compute_CI.compute_confidence(accuracy, N_1, N_2, alpha)) stats["AUC 95%:"] = str(compute_CI.compute_confidence(auc, N_1, N_2, alpha)) stats["F1-score 95%:"] = str(compute_CI.compute_confidence(f1_score_list, N_1, N_2, alpha)) stats["Precision 95%:"] = str(compute_CI.compute_confidence(precision, N_1, N_2, alpha)) stats["Sensitivity 95%: "] = str(compute_CI.compute_confidence(sensitivity, N_1, N_2, alpha)) stats["Specificity 95%:"] = str(compute_CI.compute_confidence(specificity, N_1, N_2, alpha)) print("Accuracy 95%:" + str(compute_CI.compute_confidence(accuracy, N_1, N_2, alpha))) print("AUC 95%:" + str(compute_CI.compute_confidence(auc, N_1, N_2, alpha))) print("F1-score 95%:" + str(compute_CI.compute_confidence(f1_score_list, N_1, N_2, alpha))) print("Precision 95%:" + str(compute_CI.compute_confidence(precision, N_1, N_2, alpha))) print("Sensitivity 95%: " + str(compute_CI.compute_confidence(sensitivity, N_1, N_2, alpha))) print("Specificity 95%:" + str(compute_CI.compute_confidence(specificity, N_1, N_2, alpha))) # Extract statistics on how often patients got classified correctly alwaysright = dict() alwayswrong = dict() percentages = dict() for i_ID in patient_classification_list: percentage_right = patient_classification_list[i_ID]['N_correct'] / float(patient_classification_list[i_ID]['N_test']) if i_ID in patient_IDs: label = labels[0][np.where(i_ID == patient_IDs)] else: # Multiple instance of one patient label = labels[0][np.where(i_ID.split('_')[0] == patient_IDs)] label = label[0][0] percentages[i_ID] = str(label) + ': ' + str(round(percentage_right, 2) * 100) + '%' if percentage_right == 1.0: alwaysright[i_ID] = label print(("Always Right: {}, label {}").format(i_ID, label)) elif percentage_right == 0: alwayswrong[i_ID] = label print(("Always Wrong: {}, label {}").format(i_ID, label)) stats["Always right"] = alwaysright stats["Always wrong"] = alwayswrong stats['Percentages'] = percentages if show_plots: # Plot some characteristics in boxplots import matplotlib.pyplot as plt plt.figure() plt.boxplot(accuracy) plt.ylim([-0.05, 1.05]) plt.ylabel('Accuracy') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(auc) plt.ylim([-0.05, 1.05]) plt.ylabel('AUC') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(precision) plt.ylim([-0.05, 1.05]) plt.ylabel('Precision') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(sensitivity) plt.ylim([-0.05, 1.05]) plt.ylabel('Sensitivity') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() plt.figure() plt.boxplot(specificity) plt.ylim([-0.05, 1.05]) plt.ylabel('Specificity') plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off plt.tight_layout() plt.show() return stats
def execute(self): # Check if minimal input is supplied if not self.FixedImage: message = "You need to supply a fixed image for registration." raise WORCexceptions.WORCNotImplementedError(message) if not self.MovingImage: message = "You need to supply a moving image for registration." raise WORCexceptions.WORCNotImplementedError(message) if len(self.ParameterMaps) == 0: message = "You need to supply at leat one parameter map for registration." raise WORCexceptions.WORCNotImplementedError(message) # Set moving and fixed image sources self.source_data = dict() self.source_data['FixedImage'] = self.FixedImage self.source_data['MovingImage'] = self.MovingImage # Create a temporary directory to use tempdir = os.path.join(fastr.config.mounts['tmp'], 'WORC_Elastix') if not os.path.exists(tempdir): os.makedirs(tempdir) # Set the parameter map sources if type(self.ParameterMaps) is list: # Files, thus just provide them to the elastix node self.source_data['ParameterMaps'] = self.ParameterMaps else: # Use SimpleTransformix to save the maps and add them SimpleElastix = sitk.SimpleElastix() self.source_data['ParameterMaps'] = list() for num, f in enumerate(self.ParameterMaps): filename = ('ParameterMap{}.txt').format(str(num)) fname = os.path.join(tempdir, filename) SimpleElastix.WriteParameterFile(f, fname) sourcename = 'vfs://tmp/WORC_Elastix/' + filename self.source_data['ParameterMaps'].append(sourcename) # Based on number of parameterfiles, add nodes to train FinalBSplineInterpolationOrder self.addchangeorder() # Set the mask sources if provided # if self.FixedMask is not None: self.source_data['FixedMask'] = self.FixedMask # if self.MovingMask is not None: self.source_data['MovingMask'] = self.MovingMask # Add other images to transform if given # if self.ToTransform is not None: self.source_data['ToTransform'] = self.ToTransform # Set the network sinks self.sink_data = dict() self.sink_data['sink_trans'] = self.TransformParameters self.sink_data['sink_image'] = self.TransformedImage self.sink_data['sink_seg'] = self.TransformedSeg # Set outputfolder if given # if self.OutputFolder: # self.sink_data['Out'] = self.OutputFolder # else: # self.sink_data['Out'] = 'vfs://tmp/WORC_Elastix/output' # print self.sink_data['Out'] # Execute the network self.network.draw_network('WORC_Elastix', img_format='svg', draw_dimension=True) self.network.dumpf('{}.json'.format(self.network.id), indent=2) self.network.execute(self.source_data, self.sink_data, tmpdir=self.fastr_tmpdir)
def compute_statistics(y_truth, y_score, y_prediction, modus, regression): """Compute statistics on predictions.""" if modus == 'singlelabel': # Compute singlelabel performance metrics if not regression: return metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) else: return metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) return elif modus == 'multilabel': # Convert class objects to single label per patient y_truth_temp = list() y_prediction_temp = list() for yt, yp in zip(y_truth, y_prediction): label = np.where(yt == 1) if len(label) > 1: raise ae.WORCNotImplementedError( 'Multiclass classification evaluation is not supported in WORC.' ) y_truth_temp.append(label[0][0]) label = np.where(yp == 1) y_prediction_temp.append(label[0][0]) y_truth = y_truth_temp y_prediction = y_prediction_temp # Compute multilabel performance metrics predictions_multilabel =\ metrics.performance_multilabel(y_truth, y_prediction, y_score) # Compute all single label performance metrics as well n_labels = len(np.unique(y_truth)) for i_label in range(n_labels): y_truth_single = [i == i_label for i in y_truth] y_prediction_single = [i == i_label for i in y_prediction] y_score_single = y_score[:, i_label] predictions_singlelabel_temp =\ metrics.performance_singlelabel(y_truth_single, y_prediction_single, y_score_single, regression) if i_label == 0: predictions_singlelabel =\ [[i] for i in predictions_singlelabel_temp] else: for num, metric in enumerate(predictions_singlelabel_temp): predictions_singlelabel[num].append(metric) output = predictions_multilabel + predictions_singlelabel return output else: raise ae.WORCKeyError('{modus} is not a valid modus!')