def _validate(self, simpleworc, *args, **kwargs):
     if simpleworc._add_evaluation:
         if not simpleworc._images_train:
             if hasattr(simpleworc, 'images_train'):
                 if not simpleworc.images_train:
                     raise ae.WORCValueError(
                         f'You have added the evaluation pipeline, but have not provided images, which is currently required. We will work on this option in a future release.'
                     )
             else:
                 raise ae.WORCValueError(
                     f'You have added the evaluation pipeline, but have not provided images, which is currently required. We will work on this option in a future release.'
                 )
    def _validate(self, simpleworc):
        errstr = None

        try:
            if simpleworc._labels_file_train:
                labels, subjects, _ = load_label_csv(
                    simpleworc._labels_file_train)
            elif simpleworc.labels_file_train:
                labels, subjects, _ = load_label_csv(
                    simpleworc.labels_file_train)
            else:
                raise ae.WORCValueError(
                    f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.'
                )

        except ae.WORCAssertionError as wae:
            if 'First column should be patient ID' in str(wae):
                # TODO: print wrong column name and file so that it is clear what needs to be replaced in which file
                raise ae.WORCValueError(
                    f'First column in the file given to SimpleWORC().labels_from_this_file(**) needs to be named Patient.'
                )

        # check labels for substrings of eachother
        labels_matches = self._get_all_substrings_for_array(labels)

        if labels_matches:
            # if not empty we have a problem
            errstr = "Found label(s) that are a substring of other label(s). This is currently not allowed in WORC. Rename the following label(s):\n"
            for label, matches in labels_matches.items():
                for match in matches:
                    errstr += f"{label} is a substring of {match}\n"

        # check subject names for substrings of eachother
        subjects_matches = self._get_all_substrings_for_array(subjects)
        if subjects_matches:
            # if not empty we have a problem
            errstr = "Found subject(s) that are a substring of other subject(s). This is currently not allowed in WORC. Rename the following subject(s):\n"
            for subject, matches in subjects_matches.items():
                for match in matches:
                    errstr += f"{subject} is a substring of {match}\n"

        if errstr:
            raise ae.WORCValueError(errstr)
Ejemplo n.º 3
0
def load_labels(label_file, label_type):
    """Loads the label data from a label file

    Args:
        label_file (string): The path to the label file
        label_type (list): List of the names of the labels to load

    Returns:
        dict: A dict containing 'patient_IDs', 'label' and
         'label_type'
    """
    if not os.path.exists(label_file):
        raise ae.WORCKeyError(f'File {label_file} does not exist!')

    _, extension = os.path.splitext(label_file)
    if extension == '.txt':
        label_names, patient_IDs, label_status = load_label_txt(
            label_file)
    elif extension == '.csv':
        label_names, patient_IDs, label_status = load_label_csv(
            label_file)
    elif extension == '.ini':
        label_names, patient_IDs, label_status = load_label_XNAT(
            label_file)
    else:
        raise ae.WORCIOError(extension + ' is not valid label file extension.')

    print("Label names to extract: " + str(label_type))
    labels = list()
    for i_label in label_type:
        label_index = np.where(label_names == i_label)[0]
        if label_index.size == 0:
            raise ae.WORCValueError('Could not find label: ' + str(i_label))
        else:
            labels.append(label_status[:, label_index])

    label_data = dict()
    label_data['patient_IDs'] = patient_IDs
    label_data['label'] = labels
    label_data['label_name'] = label_type

    return label_data
    def _validate(self, simpleworc, *args, **kwargs):
        if not simpleworc._labels_file_train:
            if hasattr(simpleworc, 'labels_file_train'):
                if not simpleworc.labels_file_train:
                    raise ae.WORCValueError(
                        f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.'
                    )
            else:
                raise ae.WORCValueError(
                    f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.'
                )

        if not simpleworc._label_names:
            if not simpleworc.label_names:
                raise ae.WORCValueError(
                    f'No label(s) to predict selected. Use SimpleWorc().predict_labels(**) to select labels.'
                )

        if not simpleworc._method:
            raise ae.WORCValueError(
                f'No method selected. Call function binary_classification(**) or regression(**) or survival(**) on SimpleWorc().'
            )

        if simpleworc._images_train:
            for num, (ims, segs) in enumerate(
                    zip(simpleworc._images_train,
                        simpleworc._segmentations_train)):
                if ims.keys() != segs.keys():
                    raise ae.WORCValueError(
                        f'Subjects in images_train and segmentations_train are not the same for modality {num}.'
                    )

        if hasattr(simpleworc, 'images_train'):
            if simpleworc.images_train:
                for num, (ims, segs) in enumerate(
                        zip(simpleworc.images_train,
                            simpleworc.segmentations_train)):
                    if ims.keys() != segs.keys():
                        raise ae.WORCValueError(
                            f'Subjects in images_train and segmentations_train are not the same for modality {num}.'
                        )
 def _validate(self, simpleworc, *args, **kwargs):
     if simpleworc._num_subjects < min_subjects:
         raise ae.WORCValueError(
             f'Less than {min_subjects} subjects (you have {simpleworc._num_subjects}) will probably make WORC crash due to a split in the test/validation set having only one subject. Use at least {min_subjects} subjects or more.'
         )
def random_split_cross_validation(image_features,
                                  feature_labels,
                                  classes,
                                  patient_ids,
                                  n_iterations,
                                  param_grid,
                                  config,
                                  modus,
                                  test_size,
                                  start=0,
                                  save_data=None,
                                  tempsave=False,
                                  tempfolder=None,
                                  fixedsplits=None,
                                  fixed_seed=False,
                                  use_fastr=None,
                                  fastr_plugin=None):
    """Cross-validation in which data is randomly split in each iteration.

    Due to options of doing single-label and multi-label classification,
    stratified splitting, and regression, we use a manual loop instead
    of the default scikit-learn object.

    Parameters
    ------------

    Returns
    ------------

    """
    print('Starting random-split cross-validation.')
    logging.debug('Starting random-split cross-validation.')
    if save_data is None:
        # Start from zero, thus empty list of previos data
        save_data = list()

    for i in range(start, n_iterations):
        print(('Cross-validation iteration {} / {} .').format(
            str(i + 1), str(n_iterations)))
        logging.debug(('Cross-validation iteration {} / {} .').format(
            str(i + 1), str(n_iterations)))
        timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
        print(f'\t Time: {timestamp}.')
        logging.debug(f'\t Time: {timestamp}.')
        if fixed_seed:
            random_seed = i**2
        else:
            random_seed = np.random.randint(5000)

        t = time.time()

        # Split into test and training set, where the percentage of each
        # label is maintained
        if any(clf in regressors for clf in param_grid['classifiers']):
            # We cannot do a stratified shuffle split with regression
            stratify = None
        else:
            if modus == 'singlelabel':
                classes_temp = stratify = classes.ravel()
            elif modus == 'multilabel':
                # Create a stratification object from the labels
                # Label = 0 means no label equals one
                # Other label numbers refer to the label name that is 1
                stratify = list()
                for pnum in range(0, len(classes[0])):
                    plabel = 0
                    for lnum, slabel in enumerate(classes):
                        if slabel[pnum] == 1:
                            plabel = lnum + 1
                    stratify.append(plabel)

                # Sklearn multiclass requires rows to be objects/patients
                classes_temp = np.zeros((classes.shape[1], classes.shape[0]))
                for n_patient in range(0, classes.shape[1]):
                    for n_label in range(0, classes.shape[0]):
                        classes_temp[n_patient, n_label] = classes[n_label,
                                                                   n_patient]
            else:
                raise ae.WORCKeyError('{} is not a valid modus!').format(modus)

        if fixedsplits is None:
            # Use Random Split. Split per patient, not per sample
            unique_patient_ids, unique_indices =\
                np.unique(np.asarray(patient_ids), return_index=True)
            if any(clf in regressors for clf in param_grid['classifiers']):
                unique_stratify = None
            else:
                unique_stratify = [stratify[i] for i in unique_indices]

            try:
                unique_PID_train, indices_PID_test\
                    = train_test_split(unique_patient_ids,
                                       test_size=test_size,
                                       random_state=random_seed,
                                       stratify=unique_stratify)
            except ValueError as e:
                e = str(e) + ' Increase the size of your validation set.'
                raise ae.WORCValueError(e)

            # Check for all ids if they are in test or training
            indices_train = list()
            indices_test = list()
            patient_ID_train = list()
            patient_ID_test = list()
            for num, pid in enumerate(patient_ids):
                if pid in unique_PID_train:
                    indices_train.append(num)

                    # Make sure we get a unique ID
                    if pid in patient_ID_train:
                        n = 1
                        while str(pid + '_' + str(n)) in patient_ID_train:
                            n += 1
                        pid = str(pid + '_' + str(n))
                    patient_ID_train.append(pid)
                else:
                    indices_test.append(num)

                    # Make sure we get a unique ID
                    if pid in patient_ID_test:
                        n = 1
                        while str(pid + '_' + str(n)) in patient_ID_test:
                            n += 1
                        pid = str(pid + '_' + str(n))
                    patient_ID_test.append(pid)

            # Split features and labels accordingly
            X_train = [image_features[i] for i in indices_train]
            X_test = [image_features[i] for i in indices_test]
            if modus == 'singlelabel':
                Y_train = classes_temp[indices_train]
                Y_test = classes_temp[indices_test]
            elif modus == 'multilabel':
                Y_train = classes_temp[indices_train, :]
                Y_test = classes_temp[indices_test, :]
            else:
                raise ae.WORCKeyError('{} is not a valid modus!').format(modus)

        else:
            # Use pre defined splits
            train = fixedsplits[str(i) + '_train'].values
            test = fixedsplits[str(i) + '_test'].values

            # Convert the numbers to the correct indices
            ind_train = list()
            for j in train:
                success = False
                for num, p in enumerate(patient_ids):
                    if j == p:
                        ind_train.append(num)
                        success = True
                if not success:
                    raise ae.WORCIOError("Patient " + str(j).zfill(3) +
                                         " is not included!")

            ind_test = list()
            for j in test:
                success = False
                for num, p in enumerate(patient_ids):
                    if j == p:
                        ind_test.append(num)
                        success = True
                if not success:
                    raise ae.WORCIOError("Patient " + str(j).zfill(3) +
                                         " is not included!")

            X_train = [image_features[i] for i in ind_train]
            X_test = [image_features[i] for i in ind_test]

            patient_ID_train = patient_ids[ind_train]
            patient_ID_test = patient_ids[ind_test]

            if modus == 'singlelabel':
                Y_train = classes_temp[ind_train]
                Y_test = classes_temp[ind_test]
            elif modus == 'multilabel':
                Y_train = classes_temp[ind_train, :]
                Y_test = classes_temp[ind_test, :]
            else:
                raise ae.WORCKeyError('{} is not a valid modus!').format(modus)

        # Find best hyperparameters and construct classifier
        config['HyperOptimization']['use_fastr'] = use_fastr
        config['HyperOptimization']['fastr_plugin'] = fastr_plugin
        n_cores = config['General']['Joblib_ncores']
        trained_classifier = random_search_parameters(
            features=X_train,
            labels=Y_train,
            param_grid=param_grid,
            n_cores=n_cores,
            random_seed=random_seed,
            **config['HyperOptimization'])

        # We only want to save the feature values and one label array
        X_train = [x[0] for x in X_train]
        X_test = [x[0] for x in X_test]

        temp_save_data = (trained_classifier, X_train, X_test, Y_train, Y_test,
                          patient_ID_train, patient_ID_test, random_seed)

        save_data.append(temp_save_data)

        # Create a temporary save
        if tempsave:
            panda_labels = [
                'trained_classifier', 'X_train', 'X_test', 'Y_train', 'Y_test',
                'config', 'patient_ID_train', 'patient_ID_test', 'random_seed',
                'feature_labels'
            ]

            panda_data_temp =\
                pd.Series([trained_classifier, X_train, X_test, Y_train,
                           Y_test, config, patient_ID_train,
                           patient_ID_test, random_seed, feature_labels],
                          index=panda_labels,
                          name='Constructed crossvalidation')

            panda_data = pd.DataFrame(panda_data_temp)
            n = 0
            filename = os.path.join(tempfolder, 'tempsave_' + str(i) + '.hdf5')
            while os.path.exists(filename):
                n += 1
                filename = os.path.join(tempfolder,
                                        'tempsave_' + str(i + n) + '.hdf5')

            panda_data.to_hdf(filename, 'EstimatorData')
            del panda_data, panda_data_temp

        # Print elapsed time
        elapsed = int((time.time() - t) / 60.0)
        print(f'\t Fitting took {elapsed} minutes.')
        logging.debug(f'\t Fitting took {elapsed} minutes.')

    return save_data
Ejemplo n.º 7
0
def fit_and_score(X,
                  y,
                  scoring,
                  train,
                  test,
                  parameters,
                  fit_params=None,
                  return_train_score=True,
                  return_n_test_samples=True,
                  return_times=True,
                  return_parameters=False,
                  return_estimator=False,
                  error_score='raise',
                  verbose=True,
                  return_all=True):
    """Fit an estimator to a dataset and score the performance.

    The following
    methods can currently be applied as preprocessing before fitting, in
    this order:
    0. Apply OneHotEncoder
    1. Apply feature imputation
    2. Select features based on feature type group (e.g. shape, histogram).
    3. Scale features with e.g. z-scoring.
    4. Apply feature selection based on variance of feature among patients.
    5. Univariate statistical testing (e.g. t-test, Wilcoxon).
    6. Use Relief feature selection.
    7. Select features based on a fit with a LASSO model.
    8. Select features using PCA.
    9. Resampling
    10. If a SingleLabel classifier is used for a MultiLabel problem,
        a OneVsRestClassifier is employed around it.

    All of the steps are optional.

    Parameters
    ----------
    estimator: sklearn estimator, mandatory
            Unfitted estimator which will be fit.

    X: array, mandatory
            Array containingfor each object (rows) the feature values
            (1st Column) and the associated feature label (2nd Column).

    y: list(?), mandatory
            List containing the labels of the objects.

    scorer: sklearn scorer, mandatory
            Function used as optimization criterion for the hyperparamater optimization.

    train: list, mandatory
            Indices of the objects to be used as training set.

    test: list, mandatory
            Indices of the objects to be used as testing set.

    parameters: dictionary, mandatory
            Contains the settings used for the above preprocessing functions
            and the fitting. TODO: Create a default object and show the
            fields.

    fit_params:dictionary, default None
            Parameters supplied to the estimator for fitting. See the SKlearn
            site for the parameters of the estimators.

    return_train_score: boolean, default True
            Save the training score to the final SearchCV object.

    return_n_test_samples: boolean, default True
            Save the number of times each sample was used in the test set
            to the final SearchCV object.

    return_times: boolean, default True
            Save the time spend for each fit to the final SearchCV object.

    return_parameters: boolean, default True
            Return the parameters used in the final fit to the final SearchCV
            object.

    return_estimator : bool, default=False
        Whether to return the fitted estimator.

    error_score: numeric or "raise" by default
            Value to assign to the score if an error occurs in estimator
            fitting. If set to "raise", the error is raised. If a numeric
            value is given, FitFailedWarning is raised. This parameter
            does not affect the refit step, which will always raise the error.

    verbose: boolean, default=True
            If True, print intermediate progress to command line. Warnings are
            always printed.

    return_all: boolean, default=True
            If False, only the ret object containing the performance will be
            returned. If True, the ret object plus all fitted objects will be
            returned.

    Returns
    ----------
    Depending on the return_all input parameter, either only ret or all objects
    below are returned.

    ret: list
        Contains optionally the train_scores and the test_scores,
        fit_time, score_time, parameters_est
        and parameters_all.

    GroupSel: WORC GroupSel Object
        Either None if the groupwise feature selection is not used, or
        the fitted object.

    VarSel: WORC VarSel Object
        Either None if the variance threshold feature selection is not used, or
        the fitted object.

    SelectModel: WORC SelectModel Object
        Either None if the feature selection based on a fittd model is not
        used, or the fitted object.

    feature_labels: list
        Labels of the features. Only one list is returned, not one per
        feature object, as we assume all samples have the same feature names.

    scaler: scaler object
        Either None if feature scaling is not used, or
        the fitted object.

    encoder: WORC Encoder Object
        Either None if feature OneHotEncoding is not used, or
        the fitted object.

    imputer: WORC Imputater Object
        Either None if feature imputation is not used, or
        the fitted object.

    pca: WORC PCA Object
        Either None if PCA based feature selection is not used, or
        the fitted object.

    StatisticalSel: WORC StatisticalSel Object
        Either None if the statistical test feature selection is not used, or
        the fitted object.

    ReliefSel: WORC ReliefSel Object
        Either None if the RELIEF feature selection is not used, or
        the fitted object.

    Sampler: WORC ObjectSampler Object
        Either None if no resampling is used, or an ObjectSampler object


    """
    # We copy the parameter object so we can alter it and keep the original
    if verbose:
        print("\n")
        print('#######################################')
        print('Starting fit and score of new workflow.')
    para_estimator = parameters.copy()
    estimator = cc.construct_classifier(para_estimator)

    # Check the scorer
    scorers, __ = check_multimetric_scoring(estimator, scoring=scoring)

    para_estimator = delete_cc_para(para_estimator)

    # Get random seed from parameters
    random_seed = para_estimator['random_seed']
    del para_estimator['random_seed']

    # X is a tuple: split in two arrays
    feature_values = np.asarray([x[0] for x in X])
    feature_labels = np.asarray([x[1] for x in X])

    # Split in train and testing
    X_train, y_train = _safe_split(estimator, feature_values, y, train)
    X_test, y_test = _safe_split(estimator, feature_values, y, test, train)
    train = np.arange(0, len(y_train))
    test = np.arange(len(y_train), len(y_train) + len(y_test))

    # Set some defaults for if a part fails and we return a dummy
    fit_time = np.inf
    score_time = np.inf
    Sampler = None
    encoder = None
    imputer = None
    scaler = None
    GroupSel = None
    SelectModel = None
    pca = None
    StatisticalSel = None
    VarSel = None
    ReliefSel = None
    if isinstance(scorers, dict):
        test_scores = {name: np.nan for name in scorers}
        if return_train_score:
            train_scores = test_scores.copy()
    else:
        test_scores = error_score
        if return_train_score:
            train_scores = error_score

    # Initiate dummy return object for when fit and scoring failes: sklearn defaults
    ret = [train_scores, test_scores] if return_train_score else [test_scores]

    if return_n_test_samples:
        ret.append(_num_samples(X_test))
    if return_times:
        ret.extend([fit_time, score_time])
    if return_parameters:
        ret.append(para_estimator)
    if return_estimator:
        ret.append(estimator)

    # Additional to sklearn defaults: return all parameters
    ret.append(parameters)

    # ------------------------------------------------------------------------
    # OneHotEncoder
    if 'OneHotEncoding' in para_estimator.keys():
        if para_estimator['OneHotEncoding'] == 'True':
            if verbose:
                print(f'Applying OneHotEncoding, will ignore unknowns.')
            feature_labels_tofit =\
                para_estimator['OneHotEncoding_feature_labels_tofit']
            encoder =\
                OneHotEncoderWrapper(handle_unknown='ignore',
                                     feature_labels_tofit=feature_labels_tofit,
                                     verbose=verbose)
            encoder.fit(X_train, feature_labels)

            if encoder.encoder is not None:
                # Encoder is fitted
                feature_labels = encoder.encoder.encoded_feature_labels
                X_train = encoder.transform(X_train)
                X_test = encoder.transform(X_test)

        del para_estimator['OneHotEncoding']
        del para_estimator['OneHotEncoding_feature_labels_tofit']

    # Delete the object if we do not need to return it
    if not return_all:
        del encoder

    # ------------------------------------------------------------------------
    # Feature imputation
    if 'Imputation' in para_estimator.keys():
        if para_estimator['Imputation'] == 'True':
            imp_type = para_estimator['ImputationMethod']
            if verbose:
                print(f'Imputing NaN with {imp_type}.')
            imp_nn = para_estimator['ImputationNeighbours']

            imputer = Imputer(missing_values=np.nan,
                              strategy=imp_type,
                              n_neighbors=imp_nn)
            imputer.fit(X_train)

            original_shape = X_train.shape
            X_train = imputer.transform(X_train)
            imputed_shape = X_train.shape
            X_test = imputer.transform(X_test)

            if original_shape != imputed_shape:
                removed_features = original_shape[1] - imputed_shape[1]
                raise ae.WORCValueError(
                    f'Several features ({removed_features}) were np.NaN for all objects. Hence, imputation was not possible. Either make sure this is correct and turn of imputation, or correct the feature.'
                )

        del para_estimator['Imputation']
        del para_estimator['ImputationMethod']
        del para_estimator['ImputationNeighbours']

    # Delete the object if we do not need to return it
    if not return_all:
        del imputer

    # Remove any NaN feature values if these are still left after imputation
    X_train = replacenan(X_train,
                         verbose=verbose,
                         feature_labels=feature_labels[0])
    X_test = replacenan(X_test,
                        verbose=verbose,
                        feature_labels=feature_labels[0])

    # ------------------------------------------------------------------------
    # Groupwise feature selection
    if 'SelectGroups' in para_estimator:
        if verbose:
            print("Selecting groups of features.")
        del para_estimator['SelectGroups']
        # TODO: more elegant way to solve this
        feature_groups = [
            'shape_features', 'histogram_features', 'orientation_features',
            'texture_gabor_features', 'texture_glcm_features',
            'texture_gldm_features', 'texture_glcmms_features',
            'texture_glrlm_features', 'texture_glszm_features',
            'texture_gldzm_features', 'texture_ngtdm_features',
            'texture_ngldm_features', 'texture_lbp_features', 'dicom_features',
            'semantic_features', 'coliage_features', 'vessel_features',
            'phase_features', 'fractal_features', 'location_features',
            'rgrd_features', 'original_features', 'wavelet_features',
            'log_features'
        ]

        # First take out the toolbox selection, which is a list
        toolboxes = para_estimator['toolbox']
        del para_estimator['toolbox']

        # Check per feature group if the parameter is present
        parameters_featsel = dict()
        for group in feature_groups:
            if group not in para_estimator:
                # Default: do use the group, except for texture features
                if group == 'texture_features':
                    value = 'False'
                else:
                    value = 'True'
            else:
                value = para_estimator[group]
                del para_estimator[group]

            parameters_featsel[group] = value

        # Fit groupwise feature selection object
        GroupSel = SelectGroups(parameters=parameters_featsel,
                                toolboxes=toolboxes)
        GroupSel.fit(feature_labels[0])
        if verbose:
            print("\t Original Length: " + str(len(X_train[0])))

        # Transform all objectd accordingly
        X_train = GroupSel.transform(X_train)
        X_test = GroupSel.transform(X_test)
        if verbose:
            print("\t New Length: " + str(len(X_train[0])))
        feature_labels = GroupSel.transform(feature_labels)

    # Delete the object if we do not need to return it
    if not return_all:
        del GroupSel

    # Check whether there are any features left
    if len(X_train[0]) == 0:
        # TODO: Make a specific WORC exception for this warning.
        if verbose:
            print(
                '[WARNING]: No features are selected! Probably all feature groups were set to False. Parameters:'
            )
            print(parameters)

        # Delete the non-used fields
        para_estimator = delete_nonestimator_parameters(para_estimator)

        if return_all:
            return ret, GroupSel, VarSel, SelectModel, feature_labels[
                0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
        else:
            return ret

    # ------------------------------------------------------------------------
    # Feature scaling
    if verbose and para_estimator['FeatureScaling'] != 'None':
        print(f'Fitting scaler and transforming features, method ' +
              f'{para_estimator["FeatureScaling"]}.')

    scaling_method = para_estimator['FeatureScaling']
    if scaling_method == 'None':
        scaler = None
    else:
        skip_features = para_estimator['FeatureScaling_skip_features']
        n_skip_feat = len([
            i for i in feature_labels[0] if any(e in i for e in skip_features)
        ])
        if n_skip_feat == len(X_train[0]):
            # Don't need to scale any features
            if verbose:
                print(
                    '[WORC Warning] Skipping scaling, only skip features selected.'
                )
            scaler = None
        else:
            scaler = WORCScaler(method=scaling_method,
                                skip_features=skip_features)
            scaler.fit(X_train, feature_labels[0])

    if scaler is not None:
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

    del para_estimator['FeatureScaling']

    # Delete the object if we do not need to return it
    if not return_all:
        del scaler

    # --------------------------------------------------------------------
    # Feature selection based on variance
    if para_estimator['Featsel_Variance'] == 'True':
        if verbose:
            print("Selecting features based on variance.")
        if verbose:
            print("\t Original Length: " + str(len(X_train[0])))
        try:
            X_train, feature_labels, VarSel =\
                selfeat_variance(X_train, feature_labels)
            X_test = VarSel.transform(X_test)
        except ValueError:
            if verbose:
                print(
                    '[WARNING]: No features meet the selected Variance threshold! Skipping selection.'
                )
        if verbose:
            print("\t New Length: " + str(len(X_train[0])))

    del para_estimator['Featsel_Variance']

    # Delete the object if we do not need to return it
    if not return_all:
        del VarSel

    # Check whether there are any features left
    if len(X_train[0]) == 0:
        # TODO: Make a specific WORC exception for this warning.
        if verbose:
            print(
                '[WARNING]: No features are selected! Probably your features have too little variance. Parameters:'
            )
            print(parameters)
        para_estimator = delete_nonestimator_parameters(para_estimator)

        if return_all:
            return ret, GroupSel, VarSel, SelectModel, feature_labels[
                0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
        else:
            return ret

    # --------------------------------------------------------------------
    # Relief feature selection, possibly multi classself.
    # Needs to be done after scaling!
    # para_estimator['ReliefUse'] = 'True'
    if 'ReliefUse' in para_estimator.keys():
        if para_estimator['ReliefUse'] == 'True':
            if verbose:
                print("Selecting features using relief.")

            # Get parameters from para_estimator
            n_neighbours = para_estimator['ReliefNN']
            sample_size = para_estimator['ReliefSampleSize']
            distance_p = para_estimator['ReliefDistanceP']
            numf = para_estimator['ReliefNumFeatures']

            # Fit RELIEF object
            ReliefSel = SelectMulticlassRelief(n_neighbours=n_neighbours,
                                               sample_size=sample_size,
                                               distance_p=distance_p,
                                               numf=numf,
                                               random_state=random_seed)
            ReliefSel.fit(X_train, y)
            if verbose:
                print("\t Original Length: " + str(len(X_train[0])))

            # Transform all objects accordingly
            X_train = ReliefSel.transform(X_train)
            X_test = ReliefSel.transform(X_test)

            if verbose:
                print("\t New Length: " + str(len(X_train[0])))
            feature_labels = ReliefSel.transform(feature_labels)

        del para_estimator['ReliefUse']
        del para_estimator['ReliefNN']
        del para_estimator['ReliefSampleSize']
        del para_estimator['ReliefDistanceP']
        del para_estimator['ReliefNumFeatures']

    # Delete the object if we do not need to return it
    if not return_all:
        del ReliefSel

    # Check whether there are any features left
    if len(X_train[0]) == 0:
        # TODO: Make a specific WORC exception for this warning.
        if verbose:
            print(
                '[WARNING]: No features are selected! Probably RELIEF could not properly select features. Parameters:'
            )
            print(parameters)
        para_estimator = delete_nonestimator_parameters(para_estimator)

        if return_all:
            return ret, GroupSel, VarSel, SelectModel, feature_labels[
                0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
        else:
            return ret

    # ------------------------------------------------------------------------
    # Perform feature selection using a model
    para_estimator['SelectFromModel'] = 'True'
    if 'SelectFromModel' in para_estimator.keys(
    ) and para_estimator['SelectFromModel'] == 'True':
        model = para_estimator['SelectFromModel_estimator']
        if verbose:
            print(f"Selecting features using model {model}.")

        if model == 'Lasso':
            # Use lasso model for feature selection
            alpha = para_estimator['SelectFromModel_lasso_alpha']
            selectestimator = Lasso(alpha=alpha)

        elif model == 'LR':
            # Use logistic regression model for feature selection
            selectestimator = LogisticRegression()

        elif model == 'RF':
            # Use random forest model for feature selection
            n_estimators = para_estimator['SelectFromModel_n_trees']
            selectestimator = RandomForestClassifier(n_estimators=n_estimators)
        else:
            raise ae.WORCKeyError(
                f'Model {model} is not known for SelectFromModel. Use Lasso, LR, or RF.'
            )

        # Prefit model
        selectestimator.fit(X_train, y_train)

        # Use fit to select optimal features
        SelectModel = SelectFromModel(selectestimator, prefit=True)
        if verbose:
            print("\t Original Length: " + str(len(X_train[0])))

        X_train_temp = SelectModel.transform(X_train)
        if len(X_train_temp[0]) == 0:
            if verbose:
                print(
                    '[WORC WARNING]: No features are selected! Probably your data is too noisy or the selection too strict. Skipping SelectFromModel.'
                )
            SelectModel = None
            parameters['SelectFromModel'] = 'False'
        else:
            X_train = SelectModel.transform(X_train)
            X_test = SelectModel.transform(X_test)
            feature_labels = SelectModel.transform(feature_labels)

            if verbose:
                print("\t New Length: " + str(len(X_train[0])))

    if 'SelectFromModel' in para_estimator.keys():
        del para_estimator['SelectFromModel']
        del para_estimator['SelectFromModel_lasso_alpha']
        del para_estimator['SelectFromModel_estimator']
        del para_estimator['SelectFromModel_n_trees']

    # Delete the object if we do not need to return it
    if not return_all:
        del SelectModel

    # Check whether there are any features left
    if len(X_train[0]) == 0:
        # TODO: Make a specific WORC exception for this warning.
        if verbose:
            print(
                '[WARNING]: No features are selected! Probably SelectFromModel could not properly select features. Parameters:'
            )
            print(parameters)
        para_estimator = delete_nonestimator_parameters(para_estimator)

        if return_all:
            return ret, GroupSel, VarSel, SelectModel, feature_labels[
                0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
        else:
            return ret

    # ----------------------------------------------------------------
    # PCA dimensionality reduction
    # Principle Component Analysis
    if 'UsePCA' in para_estimator.keys(
    ) and para_estimator['UsePCA'] == 'True':
        if verbose:
            print('Fitting PCA')
            print("\t Original Length: " + str(len(X_train[0])))
        if para_estimator['PCAType'] == '95variance':
            # Select first X components that describe 95 percent of the explained variance
            pca = PCA(n_components=None, random_state=random_seed)
            try:
                pca.fit(X_train)
            except (ValueError, LinAlgError) as e:
                if verbose:
                    print(
                        f'[WARNING]: skipping this setting due to PCA Error: {e}.'
                    )

                if return_all:
                    return ret, GroupSel, VarSel, SelectModel, feature_labels[
                        0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
                else:
                    return ret

            evariance = pca.explained_variance_ratio_
            num = 0
            sum = 0
            while sum < 0.95:
                sum += evariance[num]
                num += 1

            # Make a PCA based on the determined amound of components
            pca = PCA(n_components=num, random_state=random_seed)
            try:
                pca.fit(X_train)
            except (ValueError, LinAlgError) as e:
                if verbose:
                    print(
                        f'[WARNING]: skipping this setting due to PCA Error: {e}.'
                    )

                if return_all:
                    return ret, GroupSel, VarSel, SelectModel, feature_labels[
                        0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
                else:
                    return ret

            X_train = pca.transform(X_train)
            X_test = pca.transform(X_test)

        else:
            # Assume a fixed number of components: cannot be larger than
            # n_samples
            n_components = min(len(X_train), int(para_estimator['PCAType']))

            if n_components >= len(X_train[0]):
                if verbose:
                    print(
                        f"[WORC WARNING] PCA n_components ({n_components})> n_features ({len(X_train[0])}): skipping PCA."
                    )
            else:
                pca = PCA(n_components=n_components, random_state=random_seed)
                pca.fit(X_train)
                X_train = pca.transform(X_train)
                X_test = pca.transform(X_test)

        if verbose:
            print("\t New Length: " + str(len(X_train[0])))

    # Delete the object if we do not need to return it
    if not return_all:
        del pca

    if 'UsePCA' in para_estimator.keys():
        del para_estimator['UsePCA']
        del para_estimator['PCAType']

    # --------------------------------------------------------------------
    # Feature selection based on a statistical test
    if 'StatisticalTestUse' in para_estimator.keys():
        if para_estimator['StatisticalTestUse'] == 'True':
            metric = para_estimator['StatisticalTestMetric']
            threshold = para_estimator['StatisticalTestThreshold']
            if verbose:
                print(
                    f"Selecting features based on statistical test. Method {metric}, threshold {round(threshold, 5)}."
                )
                print("\t Original Length: " + str(len(X_train[0])))

            StatisticalSel = StatisticalTestThreshold(metric=metric,
                                                      threshold=threshold)

            StatisticalSel.fit(X_train, y)
            X_train_temp = StatisticalSel.transform(X_train)
            if len(X_train_temp[0]) == 0:
                if verbose:
                    print(
                        '[WORC WARNING]: No features are selected! Probably your statistical test feature selection was too strict. Skipping thresholding.'
                    )
                StatisticalSel = None
                parameters['StatisticalTestUse'] = 'False'
            else:
                X_train = StatisticalSel.transform(X_train)
                X_test = StatisticalSel.transform(X_test)
                feature_labels = StatisticalSel.transform(feature_labels)

            if verbose:
                print("\t New Length: " + str(len(X_train[0])))

        del para_estimator['StatisticalTestUse']
        del para_estimator['StatisticalTestMetric']
        del para_estimator['StatisticalTestThreshold']

    # Delete the object if we do not need to return it
    if not return_all:
        del StatisticalSel

    # ------------------------------------------------------------------------
    # Use object resampling
    if 'Resampling_Use' in para_estimator.keys():
        if para_estimator['Resampling_Use'] == 'True':

            # Determine our starting balance
            pos_initial = int(np.sum(y_train))
            neg_initial = int(len(y_train) - pos_initial)
            len_in = len(y_train)

            # Fit ObjectSampler and transform dataset
            # NOTE: need to save random state for this one as well!
            Sampler =\
                ObjectSampler(method=para_estimator['Resampling_Method'],
                              sampling_strategy=para_estimator['Resampling_sampling_strategy'],
                              n_jobs=para_estimator['Resampling_n_cores'],
                              n_neighbors=para_estimator['Resampling_n_neighbors'],
                              k_neighbors=para_estimator['Resampling_k_neighbors'],
                              threshold_cleaning=para_estimator['Resampling_threshold_cleaning'],
                              verbose=verbose)

            try:
                Sampler.fit(X_train, y_train)
                X_train_temp, y_train_temp = Sampler.transform(
                    X_train, y_train)

            except ae.WORCValueError as e:
                message = str(e)
                if verbose:
                    print('[WORC WARNING] Skipping resampling: ' + message)
                Sampler = None
                parameters['Resampling_Use'] = 'False'

            except RuntimeError as e:
                if 'ADASYN is not suited for this specific dataset. Use SMOTE instead.' in str(
                        e):
                    # Seldomly occurs, therefore return performance dummy
                    if verbose:
                        print(
                            f'[WARNING]: {e}. Returning dummies. Parameters: ')
                        print(parameters)
                    para_estimator = delete_nonestimator_parameters(
                        para_estimator)

                    if return_all:
                        return ret, GroupSel, VarSel, SelectModel, feature_labels[
                            0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
                    else:
                        return ret
                else:
                    raise e
            else:
                pos = int(np.sum(y_train_temp))
                neg = int(len(y_train_temp) - pos)
                if pos < 10 or neg < 10:
                    if verbose:
                        print(
                            f'[WORC WARNING] Skipping resampling: to few objects returned in one or both classes (pos: {pos}, neg: {neg}).'
                        )
                    Sampler = None
                    parameters['Resampling_Use'] = 'False'
                else:
                    X_train = X_train_temp
                    y_train = y_train_temp

                    # Notify the user what the resampling did
                    pos = int(np.sum(y_train))
                    neg = int(len(y_train) - pos)
                    if verbose:
                        message = f"Resampling from {len_in} ({pos_initial} pos," +\
                                  f" {neg_initial} neg) to {len(y_train)} ({pos} pos, {neg} neg) patients."
                        print(message)

                    # Also reset train and test indices
                    train = np.arange(0, len(y_train))
                    test = np.arange(len(y_train), len(y_train) + len(y_test))

        del para_estimator['Resampling_Use']
        del para_estimator['Resampling_Method']
        del para_estimator['Resampling_sampling_strategy']
        del para_estimator['Resampling_n_neighbors']
        del para_estimator['Resampling_k_neighbors']
        del para_estimator['Resampling_threshold_cleaning']
        del para_estimator['Resampling_n_cores']

    # Delete the object if we do not need to return it
    if not return_all:
        del Sampler

    # ----------------------------------------------------------------
    # Fitting and scoring
    # Only when using fastr this is an entry
    if 'Number' in para_estimator.keys():
        del para_estimator['Number']

    # For certainty, we delete all parameters again
    para_estimator = delete_nonestimator_parameters(para_estimator)

    # NOTE: This just has to go to the construct classifier function,
    # although it is more convenient here due to the hyperparameter search
    if type(y) is list:
        labellength = 1
    else:
        try:
            labellength = y.shape[1]
        except IndexError:
            labellength = 1

    if labellength > 1 and type(estimator) not in [
            RankedSVM, RandomForestClassifier
    ]:
        # Multiclass, hence employ a multiclass classifier for e.g. SVM, LR
        estimator.set_params(**para_estimator)
        estimator = OneVsRestClassifier(estimator)

    if verbose:
        print(f"Fitting ML method: {parameters['classifiers']}.")

    # Recombine feature values and label for train and test set
    feature_values = np.concatenate((X_train, X_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)
    para_estimator = None

    try:
        ret = _fit_and_score(estimator,
                             feature_values,
                             y,
                             scorers,
                             train,
                             test,
                             verbose,
                             para_estimator,
                             fit_params,
                             return_train_score=return_train_score,
                             return_parameters=return_parameters,
                             return_n_test_samples=return_n_test_samples,
                             return_times=return_times,
                             return_estimator=return_estimator,
                             error_score=error_score)
    except (ValueError, LinAlgError) as e:
        if type(estimator) == LDA:
            if verbose:
                print(
                    f'[WARNING]: skipping this setting due to LDA Error: {e}.')

            if return_all:
                return ret, GroupSel, VarSel, SelectModel, feature_labels[
                    0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
            else:
                return ret
        else:
            raise e

    # Add original parameters to return object
    ret.append(parameters)

    if return_all:
        return ret, GroupSel, VarSel, SelectModel, feature_labels[
            0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
    else:
        return ret
Ejemplo n.º 8
0
def crossval(config, label_data, image_features,
             param_grid=None, use_fastr=False,
             fastr_plugin=None, tempsave=False,
             fixedsplits=None, ensemble={'Use': False}, outputfolder=None,
             modus='singlelabel'):
    """
    Constructs multiple individual classifiers based on the label settings

    Parameters
    ----------
    config: dict, mandatory
            Dictionary with config settings. See the Github Wiki for the
            available fields and formatting.

    label_data: dict, mandatory
            Should contain the following:
            patient_IDs (list): IDs of the patients, used to keep track of test and
                     training sets, and label data
            label (list): List of lists, where each list contains the
                                   label status for that patient for each
                                   label
            label_name (list): Contains the different names that are stored
                                  in the label object

    image_features: numpy array, mandatory
            Consists of a tuple of two lists for each patient:
            (feature_values, feature_labels)

    param_grid: dictionary, optional
            Contains the parameters and their values wich are used in the
            grid or randomized search hyperparamater optimization. See the
            construct_classifier function for some examples.

    use_fastr: boolean, default False
            If False, parallel execution through Joblib is used for fast
            execution of the hyperparameter optimization. Especially suited
            for execution on mutlicore (H)PC's. The settings used are
            specified in the config.ini file in the IOparser folder, which you
            can adjust to your system.

            If True, fastr is used to split the hyperparameter optimization in
            separate jobs. Parameters for the splitting can be specified in the
            config file. Especially suited for clusters.

    fastr_plugin: string, default None
            Determines which plugin is used for fastr executions.
            When None, uses the default plugin from the fastr config.

    tempsave: boolean, default False
            If True, create a .hdf5 file after each cross validation containing
            the classifier and results from that that split. This is written to
            the GSOut folder in your fastr output mount. If False, only
            the result of all combined cross validations will be saved to a .hdf5
            file. This will also be done if set to True.

    fixedsplits: string, optional
            By default, random split cross validation is used to train and
            evaluate the machine learning methods. Optionally, you can provide
            a .xlsx file containing fixed splits to be used. See the Github Wiki
            for the format.

    ensemble: dictionary, optional
            Contains the configuration for constructing an ensemble.

    modus: string, default 'singlelabel'
            Determine whether one-vs-all classification (or regression) for
            each single label is used ('singlelabel') or if multilabel
            classification is performed ('multilabel').

    Returns
    ----------
    panda_data: pandas dataframe
            Contains all information on the trained classifier.

    """
    if tempsave:
        import fastr


    # Define all possible regressors
    regressors = ['SVR', 'RFR', 'SGDR', 'Lasso', 'ElasticNet']

    # Process input data
    patient_IDs = label_data['patient_IDs']
    label_value = label_data['label']
    label_name = label_data['label_name']

    if outputfolder is None:
        logfilename = os.path.join(os.getcwd(), 'classifier.log')
    else:
        logfilename = os.path.join(outputfolder, 'classifier.log')
    print("Logging to file " + str(logfilename))

    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)

    logging.basicConfig(filename=logfilename, level=logging.DEBUG)
    N_iterations = config['CrossValidation']['N_iterations']
    test_size = config['CrossValidation']['test_size']

    classifier_labelss = dict()
    logging.debug('Starting classifier')

    # We only need one label instance, assuming they are all the sample
    feature_labels = image_features[0][1]

    # Check if we need to use fixedsplits:
    if fixedsplits is not None and '.xlsx' in fixedsplits:
        # fixedsplits = '/home/mstarmans/Settings/RandomSufflingOfData.xlsx'
        wb = xlrd.open_workbook(fixedsplits)
        wb = wb.sheet_by_index(1)

    if modus == 'singlelabel':
        print('Performing Single class classification.')
        logging.debug('Performing Single class classification.')
    elif modus == 'multilabel':
        print('Performing Multi label classification.')
        logging.debug('Performing Multi class classification.')
        label_value = [label_value]
        label_name = [label_name]
    else:
        m = ('{} is not a valid modus!').format(modus)
        logging.debug(m)
        raise ae.WORCKeyError(m)

    for i_class, i_name in zip(label_value, label_name):
        if modus == 'singlelabel':
            i_class_temp = i_class.ravel()

        save_data = list()

        for i in range(0, N_iterations):
            print(('Cross validation iteration {} / {} .').format(str(i + 1), str(N_iterations)))
            logging.debug(('Cross validation iteration {} / {} .').format(str(i + 1), str(N_iterations)))
            random_seed = np.random.randint(5000)

            # Split into test and training set, where the percentage of each
            # label is maintained
            if any(clf in regressors for clf in param_grid['classifiers']):
                # We cannot do a stratified shuffle split with regression
                stratify = None
            else:
                if modus == 'singlelabel':
                    stratify = i_class_temp
                elif modus == 'multilabel':
                    # Create a stratification object from the labels
                    # Label = 0 means no label equals one
                    # Other label numbers refer to the label name that is 1
                    stratify = list()
                    for pnum in range(0, len(i_class[0])):
                        plabel = 0
                        for lnum, slabel in enumerate(i_class):
                            if slabel[pnum] == 1:
                                plabel = lnum + 1
                        stratify.append(plabel)

                    # Sklearn multiclass requires rows to be objects/patients
                    # i_class = i_class.reshape(i_class.shape[1], i_class.shape[0])
                    i_class_temp = np.zeros((i_class.shape[1], i_class.shape[0]))
                    for n_patient in range(0, i_class.shape[1]):
                        for n_label in range(0, i_class.shape[0]):
                            i_class_temp[n_patient, n_label] = i_class[n_label, n_patient]
                    i_class_temp = i_class_temp
                else:
                    raise ae.WORCKeyError('{} is not a valid modus!').format(modus)

            if fixedsplits is None:
                # Use Random Split. Split per patient, not per sample
                unique_patient_IDs, unique_indices =\
                    np.unique(np.asarray(patient_IDs), return_index=True)
                if any(clf in regressors for clf in param_grid['classifiers']):
                    unique_stratify = None
                else:
                    unique_stratify = [stratify[i] for i in unique_indices]

                try:
                    unique_PID_train, indices_PID_test\
                        = train_test_split(unique_patient_IDs,
                                           test_size=test_size,
                                           random_state=random_seed,
                                           stratify=unique_stratify)
                except ValueError as e:
                    e = str(e) + ' Increase the size of your validation set.'
                    raise ae.WORCValueError(e)

                # Check for all IDs if they are in test or training
                indices_train = list()
                indices_test = list()
                patient_ID_train = list()
                patient_ID_test = list()
                for num, pid in enumerate(patient_IDs):
                    if pid in unique_PID_train:
                        indices_train.append(num)

                        # Make sure we get a unique ID
                        if pid in patient_ID_train:
                            n = 1
                            while str(pid + '_' + str(n)) in patient_ID_train:
                                n += 1
                            pid = str(pid + '_' + str(n))
                        patient_ID_train.append(pid)
                    else:
                        indices_test.append(num)

                        # Make sure we get a unique ID
                        if pid in patient_ID_test:
                            n = 1
                            while str(pid + '_' + str(n)) in patient_ID_test:
                                n += 1
                            pid = str(pid + '_' + str(n))
                        patient_ID_test.append(pid)

                # Split features and labels accordingly
                X_train = [image_features[i] for i in indices_train]
                X_test = [image_features[i] for i in indices_test]
                if modus == 'singlelabel':
                    Y_train = i_class_temp[indices_train]
                    Y_test = i_class_temp[indices_test]
                elif modus == 'multilabel':
                    Y_train = i_class_temp[indices_train, :]
                    Y_test = i_class_temp[indices_test, :]
                else:
                    raise ae.WORCKeyError('{} is not a valid modus!').format(modus)

            else:
                # Use pre defined splits
                indices = wb.col_values(i)
                indices = [int(j) for j in indices[1:]]  # First element is "Iteration x"
                train = indices[0:121]
                test = indices[121:]

                # Convert the numbers to the correct indices
                ind_train = list()
                for j in train:
                    success = False
                    for num, p in enumerate(patient_IDs):
                        if str(j).zfill(3) == p[0:3]:
                            ind_train.append(num)
                            success = True
                    if not success:
                        raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!")

                ind_test = list()
                for j in test:
                    success = False
                    for num, p in enumerate(patient_IDs):
                        if str(j).zfill(3) == p[0:3]:
                            ind_test.append(num)
                            success = True
                    if not success:
                        raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!")

                X_train = np.asarray(image_features)[ind_train].tolist()
                Y_train = np.asarray(i_class_temp)[ind_train].tolist()
                patient_ID_train = patient_IDs[ind_train]
                X_test = np.asarray(image_features)[ind_test].tolist()
                Y_test = np.asarray(i_class_temp)[ind_test].tolist()
                patient_ID_test = patient_IDs[ind_test]

            # Find best hyperparameters and construct classifier
            config['HyperOptimization']['use_fastr'] = use_fastr
            config['HyperOptimization']['fastr_plugin'] = fastr_plugin
            n_cores = config['General']['Joblib_ncores']
            trained_classifier = random_search_parameters(features=X_train,
                                                             labels=Y_train,
                                                             param_grid=param_grid,
                                                             n_cores=n_cores,
                                                             **config['HyperOptimization'])

            # Create an ensemble if required
            if ensemble['Use']:
                trained_classifier.create_ensemble(X_train, Y_train)

            # We only want to save the feature values and one label array
            X_train = [x[0] for x in X_train]
            X_test = [x[0] for x in X_test]

            temp_save_data = (trained_classifier, X_train, X_test, Y_train,
                              Y_test, patient_ID_train, patient_ID_test, random_seed)

            save_data.append(temp_save_data)

            # Create a temporary save
            if tempsave:
                panda_labels = ['trained_classifier', 'X_train', 'X_test', 'Y_train', 'Y_test',
                                'config', 'patient_ID_train', 'patient_ID_test',
                                'random_seed']

                panda_data_temp =\
                    pd.Series([trained_classifier, X_train, X_test, Y_train,
                               Y_test, config, patient_ID_train,
                               patient_ID_test, random_seed],
                              index=panda_labels,
                              name='Constructed crossvalidation')

                panda_data = pd.DataFrame(panda_data_temp)
                n = 0
                filename = os.path.join(fastr.config.mounts['tmp'], 'GSout', 'RS_' + str(i) + '.hdf5')
                while os.path.exists(filename):
                    n += 1
                    filename = os.path.join(fastr.config.mounts['tmp'], 'GSout', 'RS_' + str(i + n) + '.hdf5')

                if not os.path.exists(os.path.dirname(filename)):
                    os.makedirs(os.path.dirname(filename))

                panda_data.to_hdf(filename, 'SVMdata')
                del panda_data, panda_data_temp

        [classifiers, X_train_set, X_test_set, Y_train_set, Y_test_set,
         patient_ID_train_set, patient_ID_test_set, seed_set] =\
            zip(*save_data)

        panda_labels = ['classifiers', 'X_train', 'X_test', 'Y_train', 'Y_test',
                        'config', 'patient_ID_train', 'patient_ID_test',
                        'random_seed', 'feature_labels']

        panda_data_temp =\
            pd.Series([classifiers, X_train_set, X_test_set, Y_train_set,
                       Y_test_set, config, patient_ID_train_set,
                       patient_ID_test_set, seed_set, feature_labels],
                      index=panda_labels,
                      name='Constructed crossvalidation')

        if modus == 'singlelabel':
            i_name = ''.join(i_name)
        elif modus == 'multilabel':
            i_name = ','.join(i_name)

        classifier_labelss[i_name] = panda_data_temp

    panda_data = pd.DataFrame(classifier_labelss)

    return panda_data
def load_config(config_file_path):
    """ Parse a segmentix configuration file.

    Arguments:
        config_file_path: path to the configuration file to be parsed.

    Returns:
        settings_dict: dictionary containing all parsed settings.
    """
    if not os.path.exists(config_file_path):
        e = f'File {config_file_path} does not exist!'
        raise ae.WORCKeyError(e)

    settings = configparser.ConfigParser()
    settings.read(config_file_path)

    settings_dict = {'Segmentix': dict(), 'Preprocessing': dict()}

    # Segmentation settings
    settings_dict['Sementix'] = dict()
    settings_dict['Segmentix']['type'] =\
        str(settings['Segmentix']['segtype'])

    settings_dict['Segmentix']['mask'] =\
        str(settings['Segmentix']['mask'])

    settings_dict['Segmentix']['radius'] =\
        int(settings['Segmentix']['segradius'])

    settings_dict['Segmentix']['N_blobs'] =\
        int(settings['Segmentix']['N_blobs'])

    settings_dict['Segmentix']['fillholes'] =\
        settings['Segmentix'].getboolean('fillholes')

    settings_dict['Segmentix']['remove_small_objects'] =\
        settings['Segmentix'].getboolean('remove_small_objects')

    settings_dict['Segmentix']['min_object_size'] =\
        int(settings['Segmentix']['min_object_size'])

    settings_dict['Segmentix']['AssumeSameImageAndMaskMetadata'] =\
        settings['General'].getboolean('AssumeSameImageAndMaskMetadata')

    # Check spacing
    settings_dict['Preprocessing']['CheckSpacing'] =\
        settings['Preprocessing'].getboolean('CheckSpacing')

    # Re-orientation
    settings_dict['Preprocessing']['CheckOrientation'] =\
        settings['Preprocessing'].getboolean('CheckOrientation')

    settings_dict['Preprocessing']['OrientationPrimaryAxis'] =\
        str(settings['Preprocessing']['OrientationPrimaryAxis'])

    # Resampling
    settings_dict['Preprocessing']['Resampling'] =\
        settings['Preprocessing'].getboolean('Resampling')

    settings_dict['Preprocessing']['Resampling_spacing'] =\
        [float(item) for item in
         settings['Preprocessing']['Resampling_spacing'].split(',')]

    if len(settings_dict['Preprocessing']['Resampling_spacing']) != 3:
        s = settings_dict['Preprocessing']['Resampling_spacing']
        raise ae.WORCValueError(
            f'Resampling spacing should be three elements, got {s}')

    return settings_dict
def load_config(config_file_path):
    """ Parse a WORC configuration file.

    Arguments:
        config_file_path: path to the configuration file to be parsed.

    Returns:
        settings_dict: dictionary containing all parsed settings.
    """
    if not os.path.exists(config_file_path):
        e = f'File {config_file_path} does not exist!'
        raise ae.WORCKeyError(e)

    settings = configparser.ConfigParser()
    settings.read(config_file_path)

    settings_dict = {
        'Preprocessing': dict(),
        'ImageFeatures': dict(),
        'General': dict()
    }

    # General settings
    settings_dict['ImageFeatures']['image_type'] =\
        [str(item).strip() for item in
         settings['ImageFeatures']['image_type'].split(',')]

    settings_dict['General']['AssumeSameImageAndMaskMetadata'] =\
        settings['General'].getboolean('AssumeSameImageAndMaskMetadata')

    # Detect incorrect spacing
    settings_dict['Preprocessing']['CheckSpacing'] =\
        settings['Preprocessing'].getboolean('CheckSpacing')

    # Clipping
    settings_dict['Preprocessing']['Clipping'] =\
        settings['Preprocessing'].getboolean('Clipping')

    settings_dict['Preprocessing']['Clipping_Range'] =\
        [float(item) for item in
         settings['Preprocessing']['Clipping_Range'].split(',')]

    if len(settings_dict['Preprocessing']['Clipping_Range']) != 2:
        raise ae.WORCValueError(
            f"Clipping range should be two floats split by a comma, got {settings['Preprocessing']['Clipping_Range']}."
        )

    # Normalization
    settings_dict['Preprocessing']['Normalize'] =\
        settings['Preprocessing'].getboolean('Normalize')

    settings_dict['Preprocessing']['Normalize_ROI'] =\
        str(settings['Preprocessing']['Normalize_ROI'])

    settings_dict['Preprocessing']['ROIdilate'] =\
        str(settings['Preprocessing']['ROIdilate'])

    settings_dict['Preprocessing']['ROIDetermine'] =\
        str(settings['Preprocessing']['ROIDetermine'])

    settings_dict['Preprocessing']['ROIdilateradius'] =\
        int(settings['Preprocessing']['ROIdilateradius'])

    settings_dict['Preprocessing']['Method'] =\
        str(settings['Preprocessing']['Method'])

    # Bias Correction
    settings_dict['Preprocessing']['BiasCorrection'] =\
        settings['Preprocessing'].getboolean('BiasCorrection')

    settings_dict['Preprocessing']['BiasCorrection_Mask'] =\
        settings['Preprocessing'].getboolean('BiasCorrection_Mask')

    # Re-orientation
    settings_dict['Preprocessing']['CheckOrientation'] =\
        settings['Preprocessing'].getboolean('CheckOrientation')

    settings_dict['Preprocessing']['OrientationPrimaryAxis'] =\
        str(settings['Preprocessing']['OrientationPrimaryAxis'])

    # Resampling
    settings_dict['Preprocessing']['Resampling'] =\
        settings['Preprocessing'].getboolean('Resampling')

    settings_dict['Preprocessing']['Resampling_spacing'] =\
        [float(item) for item in
         settings['Preprocessing']['Resampling_spacing'].split(',')]

    if len(settings_dict['Preprocessing']['Resampling_spacing']) != 3:
        s = settings_dict['Preprocessing']['Resampling_spacing']
        raise ae.WORCValueError(
            f'Resampling spacing should be three elements, got {s}')

    return settings_dict
Ejemplo n.º 11
0
def findlabeldata(patientinfo, label_type, filenames=None,
                  objects=None, pids=None):
    """
    Load the label data and match to the unage features.

    Args:
        patientinfo (string): file with patient label data
        label_type (string): name of the label read out from patientinfo
        filenames (list): names of the patient feature files, used for matching
        objects (np.array or list): array of objects you want to order as well

    Returns:
        label_data (dict): contains patient ids, their labels and the label name
    """
    # Get the labels and patient IDs
    label_data_temp = load_labels(patientinfo, label_type)
    label_data = dict()
    patient_IDs = list()
    label_value = list()
    for i_len in range(len(label_data_temp['label_name'])):
        label_value.append(list())

    # Check per feature file / pid if there is a match in the label data
    if filenames:
        iterator = filenames
    elif pids:
        iterator = pids
    else:
        raise ae.WORCValueError('Either input pids or filenames for label matching!')

    objects_out = list()
    for i_feat, feat in enumerate(iterator):
        ifound = 0
        matches = list()
        for i_num, i_patient in enumerate(label_data_temp['patient_IDs']):
            if i_patient.lower() in str(feat).lower():

                # Match: add the patient ID to the ID's and to the matches
                patient_IDs.append(i_patient)
                matches.append(i_patient)

                # If there are feature files given, add it to the list
                if objects is not None:
                    objects_out.append(objects[i_feat])

                # For each label that we have, add the value to the label list
                for i_len in range(len(label_data_temp['label_name'])):
                    label_value[i_len].append(label_data_temp['label'][i_len][i_num])

                # Calculate how many matches we found for this (feature) file: should be one
                ifound += 1

        if ifound > 1:
            message = ('Multiple matches ({}) found in labeling for feature file {}.').format(str(matches), str(feat))
            raise ae.WORCValueError(message)

        elif ifound == 0:
            message = ('No entry found in labeling for feature file {}.').format(str(feat))
            raise ae.WORCKeyError(message)

    # Convert to arrays
    for i_len in range(len(label_value)):
        label_value[i_len] = np.asarray(label_value[i_len])

    label_data['patient_IDs'] = np.asarray(patient_IDs)
    label_data['label'] = np.asarray(label_value)
    label_data['label_name'] = label_data_temp['label_name']

    return label_data, objects_out
def combine_multiple_estimators(predictions,
                                label_data,
                                multilabel_type,
                                label_types,
                                ensemble=1,
                                strategy='argmax',
                                alpha=0.95):
    '''
    Combine multiple estimators in a single model.

    Note: the multilabel_type labels should correspond to the ordering in label_types.
    Hence, if multilabel_type = 0, the prediction is label_type[0] etc.
    '''

    # Load the multilabel label data
    label_data = lp.load_labels(label_data, multilabel_type)
    patient_IDs = label_data['patient_IDs']
    labels = label_data['label']

    # Initialize some objects
    y_truths = list()
    y_scores = list()
    y_predictions = list()
    pids = list()

    y_truths_train = list()
    y_scores_train = list()
    y_predictions_train = list()
    pids_train = list()

    accuracy = list()
    sensitivity = list()
    specificity = list()
    auc = list()
    f1_score_list = list()
    precision = list()
    npv = list()
    acc_av = list()

    # Extract all the predictions from the estimators
    for prediction, label_type in zip(predictions, label_types):
        y_truth, y_score, y_prediction, pid,\
            y_truth_train, y_score_train, y_prediction_train, pid_train =\
            plot_estimator_performance(prediction, label_data, label_type,
                                       ensemble=ensemble, output='allscores')
        y_truths.append(y_truth)
        y_scores.append(y_score)
        y_predictions.append(y_prediction)
        pids.append(pid)

        y_truths_train.append(y_truth_train)
        y_scores_train.append(y_score_train)
        y_predictions_train.append(y_prediction_train)
        pids_train.append(pid_train)

    # Combine the predictions
    for i_crossval in range(0, len(y_truths[0])):
        # Extract all values for this cross validation iteration from all objects
        y_truth = [t[i_crossval] for t in y_truths]
        y_score = [t[i_crossval] for t in y_scores]
        pid = [t[i_crossval] for t in pids]

        if strategy == 'argmax':
            # For each patient, take the maximum posterior
            y_prediction = np.argmax(y_score, axis=0)
            y_score = np.max(y_score, axis=0)
        elif strategy == 'decisiontree':
            # Fit a decision tree on the training set
            a = 1
        else:
            raise ae.WORCValueError(
                f"{strategy} is not a valid estimation combining strategy! Should be one of [argmax]."
            )

        # Compute multilabel performance metrics
        y_truth = np.argmax(y_truth, axis=0)
        accuracy_temp, sensitivity_temp, specificity_temp, \
            precision_temp, npv_temp, f1_score_temp, auc_temp, accav_temp = \
            metrics.performance_multilabel(y_truth,
                                           y_prediction,
                                           y_score)

        print("Truth: " + str(y_truth))
        print("Prediction: " + str(y_prediction))
        print('AUC: ' + str(auc_temp))

        # Append performance to lists for all cross validations
        accuracy.append(accuracy_temp)
        sensitivity.append(sensitivity_temp)
        specificity.append(specificity_temp)
        auc.append(auc_temp)
        f1_score_list.append(f1_score_temp)
        precision.append(precision_temp)
        npv.append(npv_temp)
        acc_av.append(acc_av_temp)

    # Extract sample size
    N_1 = float(len(train_patient_IDs))
    N_2 = float(len(test_patient_IDs))

    # Compute confidence intervals
    stats = dict()
    stats[
        "Accuracy 95%:"] = f"{np.nanmean(accuracy)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}"
    stats[
        "Average Accuracy 95%:"] = f"{np.nanmean(acc_av)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}"
    stats[
        "AUC 95%:"] = f"{np.nanmean(auc)} {str(compute_confidence(auc, N_1, N_2, alpha))}"
    stats[
        "F1-score 95%:"] = f"{np.nanmean(f1_score_list)} {str(compute_confidence(f1_score_list, N_1, N_2, alpha))}"
    stats[
        "Precision 95%:"] = f"{np.nanmean(precision)} {str(compute_confidence(precision, N_1, N_2, alpha))}"
    stats[
        "NPV 95%:"] = f"{np.nanmean(npv)} {str(compute_confidence(npv, N_1, N_2, alpha))}"
    stats[
        "Sensitivity 95%: "] = f"{np.nanmean(sensitivity)} {str(compute_confidence(sensitivity, N_1, N_2, alpha))}"
    stats[
        "Specificity 95%:"] = f"{np.nanmean(specificity)} {str(compute_confidence(specificity, N_1, N_2, alpha))}"

    # Print all CI's
    stats = OrderedDict(sorted(stats.items()))
    for k, v in stats.items():
        print(f"{k} : {v}.")

    return stats
def plot_estimator_performance(prediction,
                               label_data,
                               label_type,
                               crossval_type=None,
                               alpha=0.95,
                               ensemble=None,
                               verbose=True,
                               ensemble_scoring=None,
                               output=None,
                               modus=None,
                               thresholds=None,
                               survival=False,
                               shuffle_estimators=False,
                               bootstrap=None,
                               bootstrap_N=None,
                               overfit_scaler=None):
    """Plot the output of a single estimator, e.g. a SVM.

    Parameters
    ----------
    prediction: pandas dataframe or string, mandatory
        output of trainclassifier function, either a pandas dataframe
        or a HDF5 file

    label_data: string, mandatory
        Contains the path referring to a .txt file containing the
        patient label(s) and value(s) to be used for learning. See
        the Github Wiki for the format.

    label_type: string, mandatory
        Name of the label to extract from the label data to test the
        estimator on.

    alpha: float, default 0.95
        Significance of confidence intervals.

    ensemble: False, integer or 'Caruana'
        Determine whether an ensemble will be created. If so,
        either provide an integer to determine how many of the
        top performing classifiers should be in the ensemble, or use
        the string "Caruana" to use smart ensembling based on
        Caruana et al. 2004.

    verbose: boolean, default True
        Plot intermedate messages.

    ensemble_scoring: string, default None
        Metric to be used for evaluating the ensemble. If None,
        the option set in the prediction object will be used.

    output: string, default stats
        Determine which results are put out. If stats, the statistics of the
        estimator will be returned. If scores, the scores will be returned.

    thresholds: list of integer(s), default None
        If None, use default threshold of sklearn (0.5) on posteriors to
        converge to a binary prediction. If one integer is provided, use that one.
        If two integers are provided, posterior < thresh[0] = 0, posterior > thresh[1] = 1.

    Returns
    ----------
    Depending on the output parameters, the following outputs are returned:

    If output == 'stats':
    stats: dictionary
        Contains the confidence intervals of the performance metrics
        and the number of times each patient was classifier correctly
        or incorrectly.

    If output == 'scores':
    y_truths: list
        Contains the true label for each object.

    y_scores: list
        Contains the score (e.g. posterior) for each object.

    y_predictions: list
        Contains the predicted label for each object.

    pids: list
        Contains the patient ID/name for each object.

    """
    # Load the prediction object if it's a hdf5 file
    if type(prediction) is not pd.core.frame.DataFrame:
        if os.path.isfile(prediction):
            prediction = pd.read_hdf(prediction)
        else:
            raise ae.WORCIOError(
                ('{} is not an existing file!').format(str(prediction)))

    # Select the estimator from the pandas dataframe to use
    keys = prediction.keys()
    if label_type is None:
        label_type = keys[0]

    # Load the label data
    if type(label_data) is not dict:
        if os.path.isfile(label_data):
            if type(label_type) is not list:
                # Singlelabel: convert to list
                label_type = [[label_type]]
            label_data = lp.load_labels(label_data, label_type)
        else:
            raise ae.WORCValueError(
                f"Label data {label_data} incorrect: not a dictionary, or file does not exist."
            )

    n_labels = len(label_type)
    patient_IDs = label_data['patient_IDs']
    labels = label_data['label']

    if type(label_type) is list:
        # FIXME: Support for multiple label types not supported yet.
        print(
            '[WORC Warning] Support for multiple label types not supported yet. Taking first label for plot_estimator_performance.'
        )
        label_type = keys[0]

    # Extract the estimators, features and labels
    regression = is_regressor(
        prediction[label_type]['classifiers'][0].best_estimator_)
    feature_labels = prediction[label_type]['feature_labels']

    # Get some configuration variables if present in the prediction
    config = prediction[label_type].config
    if ensemble is None:
        ensemble = int(config['Ensemble']['Use'])

    if modus is None:
        modus = config['Labels']['modus']

    if crossval_type is None:
        crossval_type = config['CrossValidation']['Type']

    if bootstrap is None:
        bootstrap = config['Bootstrap']['Use']

    if bootstrap_N is None:
        bootstrap_N = int(config['Bootstrap']['N_iterations'])

    if overfit_scaler is None:
        overfit_scaler = config['Evaluation']['OverfitScaler']

    ensemble_metric = config['Ensemble']['Metric']

    # Create lists for performance measures
    if not regression:
        sensitivity = list()
        specificity = list()
        precision = list()
        npv = list()
        accuracy = list()
        bca = list()
        auc = list()
        f1_score_list = list()

        if modus == 'multilabel':
            acc_av = list()

            # Also add scoring measures for all single label scores
            sensitivity_single = [list() for j in n_labels]
            specificity_single = [list() for j in n_labels]
            precision_single = [list() for j in n_labels]
            npv_single = [list() for j in n_labels]
            accuracy_single = [list() for j in n_labels]
            bca_single = [list() for j in n_labels]
            auc_single = [list() for j in n_labels]
            f1_score_list_single = [list() for j in n_labels]

    else:
        r2score = list()
        MSE = list()
        coefICC = list()
        PearsonC = list()
        PearsonP = list()
        SpearmanC = list()
        SpearmanP = list()

    patient_classification_list = dict()
    percentages_selected = list()

    if output in ['scores', 'decision'] or crossval_type == 'LOO':
        # Keep track of all groundth truths and scores
        y_truths = list()
        y_scores = list()
        y_predictions = list()
        pids = list()

    # Extract sample size
    N_1 = float(len(prediction[label_type]['patient_ID_train'][0]))
    N_2 = float(len(prediction[label_type]['patient_ID_test'][0]))

    # Convert tuples to lists if required
    if type(prediction[label_type]['X_test']) is tuple:
        prediction[label_type]['X_test'] = list(
            prediction[label_type]['X_test'])
        prediction[label_type]['X_train'] = list(
            prediction[label_type]['X_train'])
        prediction[label_type]['Y_train'] = list(
            prediction[label_type]['Y_train'])
        prediction[label_type]['Y_test'] = list(
            prediction[label_type]['Y_test'])
        prediction[label_type]['patient_ID_test'] = list(
            prediction[label_type]['patient_ID_test'])
        prediction[label_type]['patient_ID_train'] = list(
            prediction[label_type]['patient_ID_train'])
        prediction[label_type]['classifiers'] = list(
            prediction[label_type]['classifiers'])

    # Loop over the test sets, which correspond to cross-validation
    # or bootstrapping iterations
    n_iter = len(prediction[label_type]['Y_test'])
    if bootstrap:
        iterobject = range(0, bootstrap_N)
    else:
        iterobject = range(0, n_iter)

    for i in iterobject:
        print("\n")
        if bootstrap:
            print(f"Bootstrap {i + 1} / {bootstrap_N}.")
        else:
            print(f"Cross-validation {i + 1} / {n_iter}.")

        test_indices = list()

        # When bootstrapping, there is only a single train/test set.
        if bootstrap:
            if i == 0:
                X_test_temp_or = prediction[label_type]['X_test'][0]
                X_train_temp = prediction[label_type]['X_train'][0]
                Y_train_temp = prediction[label_type]['Y_train'][0]
                Y_test_temp_or = prediction[label_type]['Y_test'][0]
                test_patient_IDs_or = prediction[label_type][
                    'patient_ID_test'][0]
                train_patient_IDs = prediction[label_type]['patient_ID_train'][
                    0]
                fitted_model = prediction[label_type]['classifiers'][0]

                # Objects required for first iteration
                test_patient_IDs = test_patient_IDs_or[:]
                X_test_temp = X_test_temp_or[:]
                Y_test_temp = Y_test_temp_or[:]
        else:
            X_test_temp = prediction[label_type]['X_test'][i]
            X_train_temp = prediction[label_type]['X_train'][i]
            Y_train_temp = prediction[label_type]['Y_train'][i]
            Y_test_temp = prediction[label_type]['Y_test'][i]
            test_patient_IDs = prediction[label_type]['patient_ID_test'][i]
            train_patient_IDs = prediction[label_type]['patient_ID_train'][i]
            fitted_model = prediction[label_type]['classifiers'][i]

        # Check which patients are in the test set.
        if output == 'stats' and crossval_type != 'LOO':
            for i_ID in test_patient_IDs:
                # Initiate counting how many times a patient is classified correctly
                if i_ID not in patient_classification_list:
                    patient_classification_list[i_ID] = dict()
                    patient_classification_list[i_ID]['N_test'] = 0
                    patient_classification_list[i_ID]['N_correct'] = 0
                    patient_classification_list[i_ID]['N_wrong'] = 0

                patient_classification_list[i_ID]['N_test'] += 1

                # Check if this is exactly the label of the patient within the label file
                if i_ID not in patient_IDs:
                    print(
                        f'[WORC WARNING] Patient {i_ID} is not found the patient labels, removing underscore.'
                    )
                    i_ID = i_ID.split("_")[0]
                    if i_ID not in patient_IDs:
                        print(
                            f'[WORC WARNING] Did not help, excluding patient {i_ID}.'
                        )
                        continue

                test_indices.append(np.where(patient_IDs == i_ID)[0][0])

        # Extract ground truth
        y_truth = Y_test_temp

        # If required, shuffle estimators for "Random" ensembling
        if shuffle_estimators:
            # Randomly shuffle the estimators
            print('Shuffling estimators for random ensembling.')
            shuffle(fitted_model.cv_results_['params'])

        # If requested, first let the SearchCV object create an ensemble
        if bootstrap and i > 0:
            # For bootstrapping, only do this at the first iteration
            pass
        elif not fitted_model.ensemble:
            # If required, rank according to generalization score instead of mean_validation_score
            if ensemble_metric == 'generalization':
                print('Using generalization score for estimator ranking.')
                indices = fitted_model.cv_results_['rank_generalization_score']
                fitted_model.cv_results_['params'] = [
                    fitted_model.cv_results_['params'][i]
                    for i in indices[::-1]
                ]
            elif ensemble_metric != 'Default':
                raise ae.WORCKeyError(
                    f'Metric {ensemble_metric} is not known: use Default or generalization.'
                )

            # NOTE: Added for backwards compatability
            if not hasattr(fitted_model, 'cv_iter'):
                cv_iter = list(
                    fitted_model.cv.split(X_train_temp, Y_train_temp))
                fitted_model.cv_iter = cv_iter

            # Create the ensemble
            X_train_temp = [(x, feature_labels) for x in X_train_temp]
            fitted_model.create_ensemble(X_train_temp,
                                         Y_train_temp,
                                         method=ensemble,
                                         verbose=verbose,
                                         scoring=ensemble_scoring,
                                         overfit_scaler=overfit_scaler)

        # If bootstrap, generate a bootstrapped sample
        if bootstrap and i > 0:
            y_truth, y_prediction, y_score, test_patient_IDs =\
                resample(y_truth_all, y_prediction_all,
                         y_score_all, test_patient_IDs_or)
        else:
            # Create prediction
            y_prediction = fitted_model.predict(X_test_temp)

            if regression:
                y_score = y_prediction
            elif modus == 'multilabel':
                y_score = fitted_model.predict_proba(X_test_temp)
            else:
                y_score = fitted_model.predict_proba(X_test_temp)[:, 1]

            # Create a new binary score based on the thresholds if given
            if thresholds is not None:
                if len(thresholds) == 1:
                    y_prediction = y_score >= thresholds[0]
                elif len(thresholds) == 2:
                    # X_train_temp = [x[0] for x in X_train_temp]

                    y_score_temp = list()
                    y_prediction_temp = list()
                    y_truth_temp = list()
                    test_patient_IDs_temp = list()

                    thresholds_val = fit_thresholds(thresholds, fitted_model,
                                                    X_train_temp, Y_train_temp,
                                                    ensemble, ensemble_scoring)
                    for pnum in range(len(y_score)):
                        if y_score[pnum] <= thresholds_val[0] or y_score[
                                pnum] > thresholds_val[1]:
                            y_score_temp.append(y_score[pnum])
                            y_prediction_temp.append(y_prediction[pnum])
                            y_truth_temp.append(y_truth[pnum])
                            test_patient_IDs_temp.append(
                                test_patient_IDs[pnum])

                    perc = float(len(y_prediction_temp)) / float(
                        len(y_prediction))
                    percentages_selected.append(perc)
                    print(
                        f"Selected {len(y_prediction_temp)} from {len(y_prediction)} ({perc}%) patients using two thresholds."
                    )
                    y_score = y_score_temp
                    y_prediction = y_prediction_temp
                    y_truth = y_truth_temp
                    test_patient_IDs = test_patient_IDs_temp
                else:
                    raise ae.WORCValueError(
                        f"Need None, one or two thresholds on the posterior; got {len(thresholds)}."
                    )

            # If all scores are NaN, the classifier cannot do probabilities, thus
            # use hard predictions
            if np.sum(np.isnan(y_score)) == len(y_prediction):
                print(
                    '[WORC Warning] All scores NaN, replacing with prediction.'
                )
                y_score = y_prediction

        if bootstrap and i == 0:
            # Save objects for re-use
            y_truth_all = y_truth[:]
            y_prediction_all = y_prediction[:]
            y_score_all = y_score[:]

        print("Truth: " + str(y_truth))
        print("Prediction: " + str(y_prediction))
        print("Score: " + str(y_score))

        if output == 'stats' and crossval_type != 'LOO':
            # Add if patient was classified correctly or not to counting
            for i_truth, i_predict, i_test_ID in zip(y_truth, y_prediction,
                                                     test_patient_IDs):
                if modus == 'multilabel':
                    success = (i_truth == i_predict).all()
                else:
                    success = i_truth == i_predict

                if success:
                    patient_classification_list[i_test_ID]['N_correct'] += 1
                else:
                    patient_classification_list[i_test_ID]['N_wrong'] += 1

        if output in ['decision', 'scores'] or crossval_type == 'LOO':
            # Output the posteriors
            y_scores.append(y_score)
            y_truths.append(y_truth)
            y_predictions.append(y_prediction)
            pids.append(test_patient_IDs)

        elif output == 'stats':
            # Compute statistics
            print('Computing performance statistics.')
            # Compute confusion matrix and use for sensitivity/specificity
            performances = compute_statistics(y_truth, y_score, y_prediction,
                                              modus, regression)

            # Print AUC to keep you up to date
            if not regression:
                if modus == 'singlelabel':
                    accuracy_temp, bca_temp, sensitivity_temp,\
                        specificity_temp, precision_temp, npv_temp,\
                        f1_score_temp, auc_temp = performances
                else:
                    accuracy_temp, sensitivity_temp,\
                        specificity_temp, precision_temp, npv_temp,\
                        f1_score_temp, auc_temp, acc_av_temp,\
                        accuracy_temp_single,\
                        bca_temp_single, sensitivity_temp_single,\
                        specificity_temp_single, precision_temp_single,\
                        npv_temp_single, f1_score_temp_single,\
                        auc_temp_single = performances

                print('AUC: ' + str(auc_temp))

                # Append performance to lists for all cross validations
                accuracy.append(accuracy_temp)
                bca.append(bca_temp)
                sensitivity.append(sensitivity_temp)
                specificity.append(specificity_temp)
                auc.append(auc_temp)
                f1_score_list.append(f1_score_temp)
                precision.append(precision_temp)
                npv.append(npv_temp)

                if modus == 'multilabel':
                    acc_av.append(acc_av_temp)
                    for j in n_labels:
                        accuracy_single[j].append(accuracy_temp_single[j])
                        bca_single[j].append(bca_temp_single[j])
                        sensitivity_single[j].append(
                            sensitivity_temp_single[j])
                        specificity_single[j].append(
                            specificity_temp_single[j])
                        auc_single[j].append(auc_temp_single[j])
                        f1_score_list_single[j].append(f1_score_temp_single[j])
                        precision_single[j].append(precision_temp_single[j])
                        npv_single[j].append(npv_temp_single[j])

            else:
                r2score_temp, MSE_temp, coefICC_temp, PearsonC_temp,\
                    PearsonP_temp, SpearmanC_temp,\
                    SpearmanP_temp = performances

                print('R2 Score: ' + str(r2score_temp))
                r2score.append(r2score_temp)
                MSE.append(MSE_temp)
                coefICC.append(coefICC_temp)
                PearsonC.append(PearsonC_temp)
                PearsonP.append(PearsonP_temp)
                SpearmanC.append(SpearmanC_temp)
                SpearmanP.append(SpearmanP_temp)

        # Delete some objects to save memory in cross-validtion
        if not bootstrap:
            del fitted_model, X_test_temp, X_train_temp, Y_train_temp
            del Y_test_temp, test_patient_IDs, train_patient_IDs
            prediction[label_type]['X_test'][i] = None
            prediction[label_type]['X_train'][i] = None
            prediction[label_type]['Y_train'][i] = None
            prediction[label_type]['Y_test'][i] = None
            prediction[label_type]['patient_ID_test'][i] = None
            prediction[label_type]['patient_ID_train'][i] = None
            prediction[label_type]['classifiers'][i] = None

    if output in ['scores', 'decision']:
        # Return the scores and true values of all patients
        return y_truths, y_scores, y_predictions, pids

    elif output == 'stats':
        # Compute statistics
        stats = dict()
        output = dict()
        if crossval_type == 'LOO':
            performances = compute_statistics(y_truths, y_scores,
                                              y_predictions, modus, regression)

            if not regression:
                metric_names_single = [
                    'Accuracy', 'BCA', 'Sensitivity', 'Specificity',
                    'Precision', 'NPV', 'F1-score', 'AUC'
                ]
                if modus == 'singlelabel':
                    metric_names = metric_names_single
                elif modus == 'multilabel':
                    metric_names_multi = [
                        'Accuracy', 'Sensitivity', 'Specificity', 'Precision',
                        'NPV', 'F1-score', 'AUC', 'Average Accuracy'
                    ]
                    metric_names = metric_names_multi + metric_names_single

            else:
                # Regression
                metric_names = [
                    'R2-score', 'MSE', 'ICC', 'PearsonC', 'PearsonP',
                    'SpearmanC', 'SpearmanP'
                ]

            # Put all metrics with their names in the statistics dict
            for k, v in zip(metric_names, performances):
                stats[k] = str(v)

            if thresholds is not None:
                if len(thresholds) == 2:
                    # Compute percentage of patients that was selected
                    stats["Percentage Selected"] = str(percentages_selected[0])

            output['Statistics'] = stats

        else:
            # Compute alpha confidence intervals (CIs)
            # FIXME: multilabel performance per single label not included
            # FIXME: multilabel not working in bootstrap
            # FIXME: bootstrap not done in regression
            if not regression:
                metric_names_single = [
                    'Accuracy', 'BCA', 'Sensitivity', 'Specificity',
                    'Precision', 'NPV', 'F1-score', 'AUC'
                ]

                if bootstrap:
                    # Compute once for the real test set the performance
                    X_test_temp = prediction[label_type]['X_test'][0]
                    y_truth = prediction[label_type]['Y_test'][0]
                    y_prediction = fitted_model.predict(X_test_temp)
                    y_score = fitted_model.predict_proba(X_test_temp)[:, 1]

                    performances_test =\
                        metrics.performance_singlelabel(y_truth,
                                                        y_prediction,
                                                        y_score,
                                                        regression)
                    # Aggregate bootstrapped performances
                    performances_bootstrapped =\
                        [accuracy, bca, sensitivity, specificity, precision,
                         npv, f1_score_list, auc]

                    # Compute confidence intervals for all metrics
                    for p in range(len(metric_names_single)):
                        k = metric_names_single[p] + ' 95%'
                        perf = performances_bootstrapped[p]
                        perf_test = performances_test[p]
                        stats[
                            k] = f"{perf_test} {str(compute_confidence_bootstrap(perf, perf_test, N_1, alpha))}"

                else:
                    stats[
                        "Accuracy 95%:"] = f"{np.nanmean(accuracy)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}"
                    stats[
                        "BCA 95%:"] = f"{np.nanmean(bca)} {str(compute_confidence(bca, N_1, N_2, alpha))}"
                    stats[
                        "AUC 95%:"] = f"{np.nanmean(auc)} {str(compute_confidence(auc, N_1, N_2, alpha))}"
                    stats[
                        "F1-score 95%:"] = f"{np.nanmean(f1_score_list)} {str(compute_confidence(f1_score_list, N_1, N_2, alpha))}"
                    stats[
                        "Precision 95%:"] = f"{np.nanmean(precision)} {str(compute_confidence(precision, N_1, N_2, alpha))}"
                    stats[
                        "NPV 95%:"] = f"{np.nanmean(npv)} {str(compute_confidence(npv, N_1, N_2, alpha))}"
                    stats[
                        "Sensitivity 95%: "] = f"{np.nanmean(sensitivity)} {str(compute_confidence(sensitivity, N_1, N_2, alpha))}"
                    stats[
                        "Specificity 95%:"] = f"{np.nanmean(specificity)} {str(compute_confidence(specificity, N_1, N_2, alpha))}"

                    if modus == 'multilabel':
                        stats[
                            "Average Accuracy 95%:"] = f"{np.nanmean(acc_av)} {str(compute_confidence(acc_av, N_1, N_2, alpha))}"

                if thresholds is not None:
                    if len(thresholds) == 2:
                        # Compute percentage of patients that was selected
                        stats[
                            "Percentage Selected 95%:"] = f"{np.nanmean(percentages_selected)} {str(compute_confidence(percentages_selected, N_1, N_2, alpha))}"

                # Extract statistics on how often patients got classified correctly
                rankings = dict()
                alwaysright = dict()
                alwayswrong = dict()
                percentages = dict()
                timesintestset = dict()
                for i_ID in patient_classification_list:
                    percentage_right = patient_classification_list[i_ID][
                        'N_correct'] / float(
                            patient_classification_list[i_ID]['N_test'])

                    if i_ID in patient_IDs:
                        label = labels[0][np.where(i_ID == patient_IDs)]
                    else:
                        # Multiple instance of one patient
                        label = labels[0][np.where(
                            i_ID.split('_')[0] == patient_IDs)]

                    label = label[0][0]
                    percentages[i_ID] = str(label) + ': ' + str(
                        round(percentage_right, 2) * 100) + '%'
                    if percentage_right == 1.0:
                        alwaysright[i_ID] = label
                        print(f"Always Right: {i_ID}, label {label}.")

                    elif percentage_right == 0:
                        alwayswrong[i_ID] = label
                        print(f"Always Wrong: {i_ID}, label {label}.")

                    timesintestset[i_ID] = patient_classification_list[i_ID][
                        'N_test']

                rankings["Always right"] = alwaysright
                rankings["Always wrong"] = alwayswrong
                rankings['Percentages'] = percentages
                rankings['timesintestset'] = timesintestset

                output['Rankings'] = rankings

            else:
                # Regression
                stats[
                    'R2-score 95%: '] = f"{np.nanmean(r2score)} {str(compute_confidence(r2score, N_1, N_2, alpha))}"
                stats[
                    'MSE 95%: '] = f"{np.nanmean(MSE)} {str(compute_confidence(MSE, N_1, N_2, alpha))}"
                stats[
                    'ICC 95%: '] = f"{np.nanmean(coefICC)} {str(compute_confidence(coefICC, N_1, N_2, alpha))}"
                stats[
                    'PearsonC 95%: '] = f"{np.nanmean(PearsonC)} {str(compute_confidence(PearsonC, N_1, N_2, alpha))}"
                stats[
                    'PearsonP 95%: '] = f"{np.nanmean(PearsonP)} {str(compute_confidence(PearsonP, N_1, N_2, alpha))}"
                stats[
                    'SpearmanC 95%: '] = f"{np.nanmean(SpearmanC)} {str(compute_confidence(SpearmanC, N_1, N_2, alpha))}"
                stats[
                    'SpearmanP 95%: '] = f"{np.nanmean(SpearmanP)} {str(compute_confidence(SpearmanP, N_1, N_2, alpha))}"

        # Print all CI's and add to output
        stats = OrderedDict(sorted(stats.items()))
        for k, v in stats.items():
            print(f"{k} : {v}.")

        output['Statistics'] = stats
        return output