Ejemplo n.º 1
0
def plot_SVM(prediction, label_data, label_type, show_plots=False,
             alpha=0.95, ensemble=False, verbose=True,
             ensemble_scoring=None, output='stats',
             modus='singlelabel'):
    '''
    Plot the output of a single binary estimator, e.g. a SVM.

    Parameters
    ----------
    prediction: pandas dataframe or string, mandatory
        output of trainclassifier function, either a pandas dataframe
        or a HDF5 file

    label_data: string, mandatory
        Contains the path referring to a .txt file containing the
        patient label(s) and value(s) to be used for learning. See
        the Github Wiki for the format.

    label_type: string, mandatory
        Name of the label to extract from the label data to test the
        estimator on.

    show_plots: Boolean, default False
        Determine whether matplotlib performance plots are made.

    alpha: float, default 0.95
        Significance of confidence intervals.

    ensemble: False, integer or 'Caruana'
        Determine whether an ensemble will be created. If so,
        either provide an integer to determine how many of the
        top performing classifiers should be in the ensemble, or use
        the string "Caruana" to use smart ensembling based on
        Caruana et al. 2004.

    verbose: boolean, default True
        Plot intermedate messages.

    ensemble_scoring: string, default None
        Metric to be used for evaluating the ensemble. If None,
        the option set in the prediction object will be used.

    output: string, default stats
        Determine which results are put out. If stats, the statistics of the
        estimator will be returned. If scores, the scores will be returned.

    Returns
    ----------
    Depending on the output parameters, the following outputs are returned:

    If output == 'stats':
    stats: dictionary
        Contains the confidence intervals of the performance metrics
        and the number of times each patient was classifier correctly
        or incorrectly.

    If output == 'scores':
    y_truths: list
        Contains the true label for each object.

    y_scores: list
        Contains the score (e.g. posterior) for each object.

    y_predictions: list
        Contains the predicted label for each object.

    PIDs: list
        Contains the patient ID/name for each object.
    '''

    # Load the prediction object if it's a hdf5 file
    if type(prediction) is not pd.core.frame.DataFrame:
        if os.path.isfile(prediction):
            prediction = pd.read_hdf(prediction)
        else:
            raise ae.WORCIOError(('{} is not an existing file!').format(str(prediction)))

    # Select the estimator from the pandas dataframe to use
    keys = prediction.keys()
    SVMs = list()
    if label_type is None:
        label_type = keys[0]

    # Load the label data
    if type(label_data) is not dict:
        if os.path.isfile(label_data):
            if type(label_type) is not list:
                # Singlelabel: convert to list
                label_type = [[label_type]]
            label_data = lp.load_labels(label_data, label_type)

    patient_IDs = label_data['patient_IDs']
    labels = label_data['label']

    if type(label_type) is list:
        # FIXME: Support for multiple label types not supported yet.
        print('[WORC Warning] Support for multiple label types not supported yet. Taking first label for plot_SVM.')
        label_type = keys[0]

    # Extract the estimators, features and labels
    SVMs = prediction[label_type]['classifiers']
    regression = is_regressor(SVMs[0].best_estimator_)
    Y_test = prediction[label_type]['Y_test']
    X_test = prediction[label_type]['X_test']
    X_train = prediction[label_type]['X_train']
    Y_train = prediction[label_type]['Y_train']
    feature_labels = prediction[label_type]['feature_labels']

    # Create lists for performance measures
    sensitivity = list()
    specificity = list()
    precision = list()
    accuracy = list()
    auc = list()
    f1_score_list = list()
    patient_classification_list = dict()
    if output in ['scores', 'decision']:
        # Keep track of all groundth truths and scores
        y_truths = list()
        y_scores = list()
        y_predictions = list()
        PIDs = list()

    # Loop over the test sets, which probably correspond with cross validation
    # iterations
    for i in range(0, len(Y_test)):
        print("\n")
        print(("Cross validation {} / {}.").format(str(i + 1), str(len(Y_test))))
        test_patient_IDs = prediction[label_type]['patient_ID_test'][i]
        train_patient_IDs = prediction[label_type]['patient_ID_train'][i]
        X_test_temp = X_test[i]
        X_train_temp = X_train[i]
        Y_train_temp = Y_train[i]
        Y_test_temp = Y_test[i]
        test_indices = list()

        # Check which patients are in the test set.
        for i_ID in test_patient_IDs:
            test_indices.append(np.where(patient_IDs == i_ID)[0][0])

            # Initiate counting how many times a patient is classified correctly
            if i_ID not in patient_classification_list:
                patient_classification_list[i_ID] = dict()
                patient_classification_list[i_ID]['N_test'] = 0
                patient_classification_list[i_ID]['N_correct'] = 0
                patient_classification_list[i_ID]['N_wrong'] = 0

            patient_classification_list[i_ID]['N_test'] += 1

        # Extract ground truth
        y_truth = Y_test_temp

        # If requested, first let the SearchCV object create an ensemble
        if ensemble:
            # NOTE: Added for backwards compatability
            if not hasattr(SVMs[i], 'cv_iter'):
                cv_iter = list(SVMs[i].cv.split(X_train_temp, Y_train_temp))
                SVMs[i].cv_iter = cv_iter

            # Create the ensemble
            X_train_temp = [(x, feature_labels) for x in X_train_temp]
            SVMs[i].create_ensemble(X_train_temp, Y_train_temp,
                                    method=ensemble, verbose=verbose,
                                    scoring=ensemble_scoring)

        # Create prediction
        y_prediction = SVMs[i].predict(X_test_temp)

        if regression:
            y_score = y_prediction
        else:
            y_score = SVMs[i].predict_proba(X_test_temp)[:, 1]

        print("Truth: " + str(y_truth))
        print("Prediction: " + str(y_prediction))

        # Add if patient was classified correctly or not to counting
        for i_truth, i_predict, i_test_ID in zip(y_truth, y_prediction, test_patient_IDs):
            if modus == 'multilabel':
                success = (i_truth == i_predict).all()
            else:
                success = i_truth == i_predict

            if success:
                patient_classification_list[i_test_ID]['N_correct'] += 1
            else:
                patient_classification_list[i_test_ID]['N_wrong'] += 1

        y_score = SVMs[i].predict_proba(X_test_temp)[:, 1]

        if output == 'decision':
            # Output the posteriors
            y_scores.append(y_score)
            y_truths.append(y_truth)
            y_predictions.append(y_prediction)
            PIDs.append(test_patient_IDs)

        elif output == 'scores':
            # Output the posteriors
            y_scores.append(y_score)
            y_truths.append(y_truth)
            y_predictions.append(y_prediction)
            PIDs.append(test_patient_IDs)

        elif output == 'stats':
            # Compute statistics
            # Compute confusion matrix and use for sensitivity/specificity
            if modus == 'singlelabel':
                # Compute singlelabel performance metrics
                if not regression:
                    accuracy_temp, sensitivity_temp, specificity_temp,\
                        precision_temp, f1_score_temp, auc_temp =\
                        metrics.performance_singlelabel(y_truth,
                                                        y_prediction,
                                                        y_score,
                                                        regression)
                else:
                    r2score, MSE, coefICC, PearsonC, PearsonP, SpearmanC,\
                        SpearmanP =\
                        metrics.performance_singlelabel(y_truth,
                                                        y_prediction,
                                                        y_score,
                                                        regression)

            elif modus == 'multilabel':
                # Convert class objects to single label per patient
                y_truth_temp = list()
                y_prediction_temp = list()
                for yt, yp in zip(y_truth, y_prediction):
                    label = np.where(yt == 1)
                    if len(label) > 1:
                        raise ae.WORCNotImplementedError('Multiclass classification evaluation is not supported in WORC.')

                    y_truth_temp.append(label[0][0])
                    label = np.where(yp == 1)
                    y_prediction_temp.append(label[0][0])

                y_truth = y_truth_temp
                y_prediction = y_prediction_temp

                # Compute multilabel performance metrics
                accuracy_temp, sensitivity_temp, specificity_temp,\
                    precision_temp, f1_score_temp, auc_temp =\
                    metrics.performance_multilabel(y_truth,
                                                   y_prediction,
                                                   y_score)

            else:
                raise ae.WORCKeyError('{} is not a valid modus!').format(modus)

            # Print AUC to keep you up to date
            print('AUC: ' + str(auc_temp))

            # Append performance to lists for all cross validations
            accuracy.append(accuracy_temp)
            sensitivity.append(sensitivity_temp)
            specificity.append(specificity_temp)
            auc.append(auc_temp)
            f1_score_list.append(f1_score_temp)
            precision.append(precision_temp)

    if output in ['scores', 'decision']:
        # Return the scores and true values of all patients
        return y_truths, y_scores, y_predictions, PIDs
    elif output == 'stats':
        # Compute statistics
        # Extract sample size
        N_1 = float(len(train_patient_IDs))
        N_2 = float(len(test_patient_IDs))

        # Compute alpha confidence intervallen
        stats = dict()
        stats["Accuracy 95%:"] = str(compute_CI.compute_confidence(accuracy, N_1, N_2, alpha))

        stats["AUC 95%:"] = str(compute_CI.compute_confidence(auc, N_1, N_2, alpha))

        stats["F1-score 95%:"] = str(compute_CI.compute_confidence(f1_score_list, N_1, N_2, alpha))

        stats["Precision 95%:"] = str(compute_CI.compute_confidence(precision, N_1, N_2, alpha))

        stats["Sensitivity 95%: "] = str(compute_CI.compute_confidence(sensitivity, N_1, N_2, alpha))

        stats["Specificity 95%:"] = str(compute_CI.compute_confidence(specificity, N_1, N_2, alpha))

        print("Accuracy 95%:" + str(compute_CI.compute_confidence(accuracy, N_1, N_2, alpha)))

        print("AUC 95%:" + str(compute_CI.compute_confidence(auc, N_1, N_2, alpha)))

        print("F1-score 95%:" + str(compute_CI.compute_confidence(f1_score_list, N_1, N_2, alpha)))

        print("Precision 95%:" + str(compute_CI.compute_confidence(precision, N_1, N_2, alpha)))

        print("Sensitivity 95%: " + str(compute_CI.compute_confidence(sensitivity, N_1, N_2, alpha)))

        print("Specificity 95%:" + str(compute_CI.compute_confidence(specificity, N_1, N_2, alpha)))

        # Extract statistics on how often patients got classified correctly
        alwaysright = dict()
        alwayswrong = dict()
        percentages = dict()
        for i_ID in patient_classification_list:
            percentage_right = patient_classification_list[i_ID]['N_correct'] / float(patient_classification_list[i_ID]['N_test'])

            if i_ID in patient_IDs:
                label = labels[0][np.where(i_ID == patient_IDs)]
            else:
                # Multiple instance of one patient
                label = labels[0][np.where(i_ID.split('_')[0] == patient_IDs)]

            label = label[0][0]
            percentages[i_ID] = str(label) + ': ' + str(round(percentage_right, 2) * 100) + '%'
            if percentage_right == 1.0:
                alwaysright[i_ID] = label
                print(("Always Right: {}, label {}").format(i_ID, label))

            elif percentage_right == 0:
                alwayswrong[i_ID] = label
                print(("Always Wrong: {}, label {}").format(i_ID, label))

        stats["Always right"] = alwaysright
        stats["Always wrong"] = alwayswrong
        stats['Percentages'] = percentages

        if show_plots:
            # Plot some characteristics in boxplots
            import matplotlib.pyplot as plt

            plt.figure()
            plt.boxplot(accuracy)
            plt.ylim([-0.05, 1.05])
            plt.ylabel('Accuracy')
            plt.tick_params(
                axis='x',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                labelbottom='off')  # labels along the bottom edge are off
            plt.tight_layout()
            plt.show()

            plt.figure()
            plt.boxplot(auc)
            plt.ylim([-0.05, 1.05])
            plt.ylabel('AUC')
            plt.tick_params(
                axis='x',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                labelbottom='off')  # labels along the bottom edge are off
            plt.tight_layout()
            plt.show()

            plt.figure()
            plt.boxplot(precision)
            plt.ylim([-0.05, 1.05])
            plt.ylabel('Precision')
            plt.tick_params(
                axis='x',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                labelbottom='off')  # labels along the bottom edge are off
            plt.tight_layout()
            plt.show()

            plt.figure()
            plt.boxplot(sensitivity)
            plt.ylim([-0.05, 1.05])
            plt.ylabel('Sensitivity')
            plt.tick_params(
                axis='x',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                labelbottom='off')  # labels along the bottom edge are off
            plt.tight_layout()
            plt.show()

            plt.figure()
            plt.boxplot(specificity)
            plt.ylim([-0.05, 1.05])
            plt.ylabel('Specificity')
            plt.tick_params(
                axis='x',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                labelbottom='off')  # labels along the bottom edge are off
            plt.tight_layout()
            plt.show()

        return stats
Ejemplo n.º 2
0
    def execute(self):
        # Check if minimal input is supplied
        if not self.FixedImage:
            message = "You need to supply a fixed image for registration."
            raise WORCexceptions.WORCNotImplementedError(message)

        if not self.MovingImage:
            message = "You need to supply a moving image for registration."
            raise WORCexceptions.WORCNotImplementedError(message)

        if len(self.ParameterMaps) == 0:
            message = "You need to supply at leat one parameter map for registration."
            raise WORCexceptions.WORCNotImplementedError(message)

        # Set moving and fixed image sources
        self.source_data = dict()
        self.source_data['FixedImage'] = self.FixedImage
        self.source_data['MovingImage'] = self.MovingImage

        # Create a temporary directory to use
        tempdir = os.path.join(fastr.config.mounts['tmp'], 'WORC_Elastix')
        if not os.path.exists(tempdir):
            os.makedirs(tempdir)

        # Set the parameter map sources
        if type(self.ParameterMaps) is list:
            # Files, thus just provide them to the elastix node
            self.source_data['ParameterMaps'] = self.ParameterMaps
        else:
            # Use SimpleTransformix to save the maps and add them
            SimpleElastix = sitk.SimpleElastix()
            self.source_data['ParameterMaps'] = list()
            for num, f in enumerate(self.ParameterMaps):
                filename = ('ParameterMap{}.txt').format(str(num))
                fname = os.path.join(tempdir, filename)
                SimpleElastix.WriteParameterFile(f, fname)
                sourcename = 'vfs://tmp/WORC_Elastix/' + filename
                self.source_data['ParameterMaps'].append(sourcename)

        # Based on number of parameterfiles, add nodes to train FinalBSplineInterpolationOrder
        self.addchangeorder()

        # Set the mask sources if provided
        # if self.FixedMask is not None:
        self.source_data['FixedMask'] = self.FixedMask

        # if self.MovingMask is not None:
        self.source_data['MovingMask'] = self.MovingMask

        # Add other images to transform if given
        # if self.ToTransform is not None:
        self.source_data['ToTransform'] = self.ToTransform

        # Set the network sinks
        self.sink_data = dict()
        self.sink_data['sink_trans'] = self.TransformParameters
        self.sink_data['sink_image'] = self.TransformedImage
        self.sink_data['sink_seg'] = self.TransformedSeg

        # Set outputfolder if given
        # if self.OutputFolder:
        #     self.sink_data['Out'] = self.OutputFolder
        # else:
        #     self.sink_data['Out'] = 'vfs://tmp/WORC_Elastix/output'

        # print self.sink_data['Out']

        # Execute the network
        self.network.draw_network('WORC_Elastix',
                                  img_format='svg',
                                  draw_dimension=True)
        self.network.dumpf('{}.json'.format(self.network.id), indent=2)
        self.network.execute(self.source_data,
                             self.sink_data,
                             tmpdir=self.fastr_tmpdir)
def compute_statistics(y_truth, y_score, y_prediction, modus, regression):
    """Compute statistics on predictions."""
    if modus == 'singlelabel':
        # Compute singlelabel performance metrics
        if not regression:
            return metrics.performance_singlelabel(y_truth, y_prediction,
                                                   y_score, regression)

        else:
            return metrics.performance_singlelabel(y_truth, y_prediction,
                                                   y_score, regression)
            return

    elif modus == 'multilabel':
        # Convert class objects to single label per patient
        y_truth_temp = list()
        y_prediction_temp = list()
        for yt, yp in zip(y_truth, y_prediction):
            label = np.where(yt == 1)
            if len(label) > 1:
                raise ae.WORCNotImplementedError(
                    'Multiclass classification evaluation is not supported in WORC.'
                )

            y_truth_temp.append(label[0][0])
            label = np.where(yp == 1)
            y_prediction_temp.append(label[0][0])

        y_truth = y_truth_temp
        y_prediction = y_prediction_temp

        # Compute multilabel performance metrics
        predictions_multilabel =\
            metrics.performance_multilabel(y_truth,
                                           y_prediction,
                                           y_score)

        # Compute all single label performance metrics as well
        n_labels = len(np.unique(y_truth))
        for i_label in range(n_labels):
            y_truth_single = [i == i_label for i in y_truth]
            y_prediction_single = [i == i_label for i in y_prediction]
            y_score_single = y_score[:, i_label]

            predictions_singlelabel_temp =\
                metrics.performance_singlelabel(y_truth_single,
                                                y_prediction_single,
                                                y_score_single,
                                                regression)

            if i_label == 0:
                predictions_singlelabel =\
                    [[i] for i in predictions_singlelabel_temp]
            else:
                for num, metric in enumerate(predictions_singlelabel_temp):
                    predictions_singlelabel[num].append(metric)

        output = predictions_multilabel + predictions_singlelabel
        return output

    else:
        raise ae.WORCKeyError('{modus} is not a valid modus!')