Ejemplo n.º 1
0
    def run(self):
        """ 
        """

        # Load model
        output = pickle.load(open(self.model_file, "rb"))
        preprocessor = output["preprocessor"]
        best_model = output["model"]
        if hasattr(best_model, "best_estimator_"):
            best_model = best_model.best_estimator_

        # Get data
        data_loader = DataLoader(configuration_file=self.data_file)
        data_loader.load_data()
        feature = data_loader.features_  
        target = data_loader.targets_ 
        
        # Predict
        if not isinstance(best_model, list):  # Force the model and preprocessor is a list
            best_model = [best_model, best_model]
            
        if not isinstance(preprocessor, list):  # Force the model and preprocessor is a list
            preprocessor = [preprocessor, preprocessor]
        
        predict_label = []
        pred_prob = []
        for prep, model_ in zip(preprocessor, best_model):
            
            # Feature Preprocessing
            feature = prep.transform(feature)
            
            # Predict
            predict_label.append(model_.predict(feature))   
                                  
            if hasattr(model_, 'predict_proba'):
                pred_prob.append(model_.predict_proba(feature))
            elif hasattr(model_, 'decision_function'):
                prob_pos = model_.decision_function(feature)
                prob_pos = \
                    (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
                    
                pred_prob.append(prob_pos)
                
                
            
            
            else:
                pred_prob = predict_label
        
        # Get voted predict label
        final_label = self.vote(predict_label)
        final_prob = np.mean(pred_prob,0)
        
        # Evaluation
        acc, sens, spec, _, _ = ModelEvaluator().binary_evaluator(
            target, final_label, final_prob,
            verbose=1, is_showfig=False, is_savefig=False
        )

        return acc, sens, spec
Ejemplo n.º 2
0
    def main_run(self):
        # Get all inputs
        self.load_data()
        self.get_all_inputs()

        # Make pipeline
        self.make_pipeline_()
        
        # Get training and test datasets        
        cv = self.method_model_evaluation_ 
        target_test_all = []
        for train_index, test_index in cv.split(self.features_, self.targets_):
            feature_train = self.features_[train_index, :]
            feature_test = self.features_[test_index, :]
            target_train = self.targets_[train_index]
            target_test = self.targets_[test_index]
            target_test_all.extend(target_test)

            # Resample
            imbalance_resample = self.method_unbalance_treatment_
            if imbalance_resample:
                feature_train, target_train = imbalance_resample.fit_resample(feature_train, target_train)
                print(f"After re-sampling, the sample size are: {sorted(Counter(target_train).items())}")
            
            # Fit
            self.fit_(feature_train, target_train)
            
            # Get weights
            self.get_weights_(feature_train, target_train)
            
            # Predict
            y_pred, y_prob = self.predict(feature_test)
            
            # Eval performances
            acc, sens, spec, auc = ModelEvaluator().binary_evaluator(
                target_test, y_pred, y_prob,
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                verbose=1, is_showfig=False, is_savefig=False
            )

        return y_pred, y_prob
Ejemplo n.º 3
0
    def main_run(self):

        self.preprocessing()

        # Get training and test datasets
        self.target_test_all = []
        self.pred_prob = []
        self.real_score = []
        models = []
        weights = []
        subname = []
        for train_index, test_index in self.method_model_evaluation_.split(
                self.features_, self.targets_):
            feature_train = self.features_[train_index, :]
            feature_test = self.features_[test_index, :]
            target_train = self.targets_[train_index]
            target_test = self.targets_[test_index]

            # Preprocessing
            self.prep_ = Denan(how='median')
            feature_train = self.prep_.fit_transform(feature_train)
            feature_test = self.prep_.transform(feature_test)
            preprocessor.append(self.prep_)

            self.target_test_all.extend(target_test)

            subname_ = self.id_[test_index]
            subname.extend(subname_)

            # Fit
            self.fit_(self.model_, feature_train, target_train, self.memory)
            models.append(self.model_)

            # Get weights
            _, weights_ = self.get_weights_(feature_train, target_train)

            # Predict
            y_prob = self.predict_(self.model_, feature_test)

            # Eval performances
            score = self.metric(target_test, y_prob)
            self.real_score.append(score)
            self.pred_prob.extend(y_prob)

            weights.append(weights_)

        # Eval performances for all fold
        out_name_perf = os.path.join(self.out_dir,
                                     "regression_performances.pdf")
        all_score = ModelEvaluator().regression_evaluator(
            self.target_test_all,
            self.pred_prob,
            self.real_score,
            is_showfig=False,
            is_savefig=True,
            out_name=out_name_perf)

        # Save weight
        self.save_weight(weights, self.out_dir)

        # Save outputs
        self.outputs = {
            "preprocessor": preprocessor,
            "model": models,
            "subname": subname,
            "test_targets": self.target_test_all,
            "test_probability": self.pred_prob,
            "score": self.real_score
        }

        pickle.dump(self.outputs,
                    open(os.path.join(self.out_dir, "outputs.pickle"), "wb"))
        return self
Ejemplo n.º 4
0
    def main_run(self):
        # Get all inputs
        self.load_data()
        self.get_all_inputs()

        # Make pipeline
        self.make_pipeline_()
        
        # Get training and test datasets        
        cv = self.method_model_evaluation_ 
        accuracy = []
        sensitivity = []
        specificity = []
        auc = []
        pred_test = []
        decision = []
        weights = []
        target_test_all = []
        for train_index, test_index in cv.split(self.features_, self.targets_):
            feature_train = self.features_[train_index, :]
            feature_test = self.features_[test_index, :]
            target_train = self.targets_[train_index]
            target_test = self.targets_[test_index]
            target_test_all.extend(target_test)

            # Resample
            imbalance_resample = self.method_unbalance_treatment_
            if imbalance_resample:
                feature_train, target_train = imbalance_resample.fit_resample(feature_train, target_train)
                print(f"After re-sampling, the sample size are: {sorted(Counter(target_train).items())}")
            
            # Fit
            self.fit_(feature_train, target_train)
            
            # Get weights
            self.get_weights_(feature_train, target_train)
            
            # Predict
            y_pred, y_prob = self.predict(feature_test)
            
            # Eval performances
            acc, sens, spec, auc_ = ModelEvaluator().binary_evaluator(
                target_test, y_pred, y_prob,
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                verbose=False, is_showfig=False, is_savefig=False
            )
            
            accuracy.append(acc)
            sensitivity.append(sens)
            specificity.append(spec)
            auc.append(auc_)
            pred_test.extend(y_pred)
            decision.extend(y_prob)
            weights.append(self.weights_)
         
        # Eval performances for all fold
        acc, sens, spec, auc = ModelEvaluator().binary_evaluator(
            target_test_all, pred_test, decision,
            accuracy_kfold=accuracy, sensitivity_kfold=sensitivity, specificity_kfold=specificity, AUC_kfold=auc,
            verbose=1, is_showfig=False, is_savefig=False, legend1='EMCI', legend2='AD', out_name=r"D:\悦影科技\数据处理业务1\data_variance_22_30_z\分类结果\adVSemci.pdf")

        return y_pred, y_prob
Ejemplo n.º 5
0
    def main_run(self):
        self.preprocessing()

        # Get training and test datasets
        self.real_accuracy = []
        self.real_sensitivity = []
        self.real_specificity = []
        self.real_auc = []
        self.pred_label = []
        pred_prob = []
        weights = []
        self.target_test_all = []
        subname = []
        for train_index, test_index in self.method_model_evaluation_.split(
                self.features_, self.targets_):
            feature_train = self.features_[train_index, :]
            feature_test = self.features_[test_index, :]
            target_train = self.targets_[train_index]
            target_test = self.targets_[test_index]

            subname_ = self.id_[test_index]
            subname.extend(subname_)

            # Preprocessing
            self.prep_ = Denan(how='median')
            feature_train = self.prep_.fit_transform(feature_train)
            feature_test = self.prep_.transform(feature_test)

            # Extend sorted real target of test data
            self.target_test_all.extend(target_test)

            # Resample
            imbalance_resample = self.method_unbalance_treatment_
            if imbalance_resample:
                print(
                    f"Before re-sampling, the sample size are: {sorted(Counter(target_train).items())}"
                )
                feature_train, target_train = imbalance_resample.fit_resample(
                    feature_train, target_train)
                print(
                    f"After re-sampling, the sample size are: {sorted(Counter(target_train).items())}"
                )

            # Fit
            self.fit_(self.model_, feature_train, target_train, self.memory)

            # Weights
            weights_, _ = self.get_weights_(feature_train, target_train)

            # Predict
            y_pred, y_prob = self.predict_(self.model_, feature_test)

            # Eval performances
            acc, sens, spec, auc_, _ = ModelEvaluator().binary_evaluator(
                target_test,
                y_pred,
                y_prob,
                accuracy_kfold=None,
                sensitivity_kfold=None,
                specificity_kfold=None,
                AUC_kfold=None,
                verbose=False,
                is_showfig=False,
                is_savefig=False)

            self.real_accuracy.append(acc)
            self.real_sensitivity.append(sens)
            self.real_specificity.append(spec)
            self.real_auc.append(auc_)
            self.pred_label.extend(y_pred)
            pred_prob.extend(y_prob)
            weights.append(weights_)

        # Save weight
        self.save_weight(weights, self.out_dir)

        # Eval performances for all fold
        out_name_perf = os.path.join(self.out_dir,
                                     "classification_performances.pdf")
        if os.path.exists(out_name_perf):
            time_ = time.strftime('%Y%m%d%H%M%S')
            out_name_perf = os.path.join(
                self.out_dir, f"classification_performances_{time_}.pdf")

        acc, sens, spec, auc, _ = ModelEvaluator().binary_evaluator(
            self.target_test_all,
            self.pred_label,
            pred_prob,
            accuracy_kfold=self.real_accuracy,
            sensitivity_kfold=self.real_sensitivity,
            specificity_kfold=self.real_specificity,
            AUC_kfold=self.real_auc,
            verbose=1,
            is_showfig=False,
            is_savefig=True,
            legend1='Controls',
            legend2='Patients',
            out_name=out_name_perf)

        # Save outputs
        self.outputs = {
            "preprocessor": self.prep_,
            "model": self.model_,
            "subname": subname,
            "test_targets": self.target_test_all,
            "test_prediction": self.pred_label,
            "test_probability": pred_prob,
            "accuracy": self.real_accuracy,
            "sensitivity": self.real_sensitivity,
            "specificity": self.real_specificity,
            "auc": self.real_auc
        }

        pickle.dump(self.outputs,
                    open(os.path.join(self.out_dir, "outputs.pickle"), "wb"))

        return self
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.python.keras.utils import np_utils
from sklearn.model_selection import train_test_split
import numpy as np
import pickle
from sklearn.metrics import accuracy_score
from eslearn.model_evaluator import ModelEvaluator
import matplotlib.pyplot as plt
from eslearn.machine_learning.neural_network.eeg.el_eeg_prep_data import parse_configuration
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
meval = ModelEvaluator()



class Trainer():
    def __init__(self, out_dir=None):
        self.out_dir = out_dir
        self._model_file = "eegModel.h5"
        self._modelSaveName = os.path.join(out_dir, self._model_file)
        self._historySaveName = os.path.join(self.out_dir, "trainHistoryDict.json")
        self._lossSaveName = os.path.join(self.out_dir, "loss.pdf")

    def prep_data(self, x, y, num_classes):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4,
                                                            shuffle=True, 
                                                            random_state=666)
Ejemplo n.º 7
0
    def permutation_test(self):
        print(f"Permutation test: {self.time_permutation} times...\n")

        self.permuted_accuracy = []
        self.permuted_sensitivity = []
        self.permuted_specificity = []
        self.permuted_auc = []
        count = 0
        widgets = [
            'Permutation testing',
            Percentage(), ' ',
            Bar('='), ' ',
            Timer(), ' ',
            ETA()
        ]
        pbar = ProgressBar(widgets=widgets,
                           maxval=self.time_permutation).start()

        for i in range(self.time_permutation):
            # Get training and test datasets
            accuracy = []
            sensitivity = []
            specificity = []
            AUC = []
            for train_index, test_index in self.method_model_evaluation.split(
                    self.feature, self.label):
                feature_train = self.feature[train_index, :]
                feature_test = self.feature[test_index, :]
                permuted_target_train = self.label[train_index][
                    np.random.permutation(len(train_index))]

                target_test = self.label[test_index]

                # Preprocessing
                feature_train = self.prep_.fit_transform(feature_train)
                feature_test = self.prep_.transform(feature_test)

                # Resample
                imbalance_resample = self.method_unbalance_treatment_
                if imbalance_resample:
                    feature_train, permuted_target_train = imbalance_resample.fit_resample(
                        feature_train, permuted_target_train)

                # Fit
                self.fit_(self.model, feature_train, permuted_target_train,
                          self.memory)

                # Predict
                y_pred, y_prob = self.predict_(self.model, feature_test)

                # Eval performances
                acc, sens, spec, auc_, _ = ModelEvaluator().binary_evaluator(
                    target_test,
                    y_pred,
                    y_prob,
                    accuracy_kfold=None,
                    sensitivity_kfold=None,
                    specificity_kfold=None,
                    AUC_kfold=None,
                    verbose=False,
                    is_showfig=False,
                    is_savefig=False)

                accuracy.append(acc)
                sensitivity.append(sens)
                specificity.append(spec)
                AUC.append(auc_)

            # Average performances of one permutation
            self.permuted_accuracy.append(np.mean(accuracy))
            self.permuted_sensitivity.append(np.mean(sensitivity))
            self.permuted_specificity.append(np.mean(specificity))
            self.permuted_auc.append(np.mean(AUC))

            # Progress bar
            pbar.update(count)
            count += 1

        pbar.finish()

        # Get p values
        pvalue_acc = self.calc_pvalue(self.permuted_accuracy,
                                      np.mean(self.real_accuracy))
        pvalue_sens = self.calc_pvalue(self.permuted_sensitivity,
                                       np.mean(self.real_sensitivity))
        pvalue_spec = self.calc_pvalue(self.permuted_specificity,
                                       np.mean(self.real_specificity))
        pvalue_auc = self.calc_pvalue(self.permuted_auc,
                                      np.mean(self.real_auc))

        print(f"p value for acc = {pvalue_acc:.3f}")
        return pvalue_acc, pvalue_sens, pvalue_spec, pvalue_auc
Ejemplo n.º 8
0
    def loop(self):
        self.get_configuration_(
            configuration_file=r'D:\My_Codes\virtualenv_eslearn\Lib\site-packages\eslearn\GUI\test\configuration_file.json')
        self.get_preprocessing_parameters()
        self.get_dimension_reduction_parameters()
        self.get_feature_selection_parameters()
        self.get_unbalance_treatment_parameters()
        self.get_machine_learning_parameters()
        self.get_model_evaluation_parameters()

        method_feature_preprocessing = self.method_feature_preprocessing
        param_feature_preprocessing = self.param_feature_preprocessing

        method_dim_reduction = self.method_dim_reduction
        param_dim_reduction = self.param_dim_reduction

        method_feature_selection = self.method_feature_selection
        param_feature_selection = self.param_feature_selection

        method_machine_learning = self.method_machine_learning
        param_machine_learning = self.param_machine_learning

        # Load
        self._load_data_infolder()

        # Split data into training and test datasets
        accuracy = []
        sensitivity = []
        specificity = []
        auc = []
        pred_test = []
        decision = []
        weights = []
        label_test_all = []
        cv = StratifiedKFold(n_splits=3, random_state=666)
        for train_index, test_index in cv.split(self.data, self.label):
            data_train = self.data[train_index, :]
            data_test = self.data[test_index, :]
            label_train = self.label[train_index]
            label_test = self.label[test_index]
            label_test_all.extend(label_test)

            # Resample
            ros = RandomOverSampler(random_state=0)
            data_train, label_train = ros.fit_resample(data_train, label_train)

            print(f"After re-sampling, the sample size are: {sorted(Counter(label_train).items())}")

            acc, sens, spec, auc_, pred_test_, dec, wei = self.pipeline_grid(
                method_feature_preprocessing=method_feature_preprocessing,
                param_feature_preprocessing=param_feature_preprocessing,
                method_dim_reduction=method_dim_reduction,
                param_dim_reduction=param_dim_reduction,
                method_feature_selection=method_feature_selection,
                param_feature_selection=param_feature_selection,
                method_machine_learning=method_machine_learning,
                param_machine_learning=param_machine_learning,
                data_train=data_train, data_test=data_test, label_train=label_train, label_test=label_test
            )

            accuracy.append(acc)
            sensitivity.append(sens)
            specificity.append(spec)
            auc.append(auc_)
            pred_test.extend(pred_test_)
            decision.extend(dec)
            weights.append(wei)
            
        # Eval performances
        acc, sens, spec, auc = ModelEvaluator.binary_evaluator(
            label_test_all, pred_test, decision,
            accuracy_kfold=accuracy, sensitivity_kfold=sensitivity, specificity_kfold=specificity, AUC_kfold=auc,
            verbose=1, is_showfig=True, legend1=self.legend1, legend2=self.legend2, is_savefig=False, out_name=self.performances_save_name
        )

        # save weight to nii
        # self._weight2nii(weights)
        return accuracy, sensitivity, specificity, auc, weights
Ejemplo n.º 9
0
    def pipeline_grid(self,
                      method_feature_preprocessing=None,
                      param_feature_preprocessing=None,
                      method_dim_reduction=None,
                      param_dim_reduction=None,
                      method_feature_selection=None,
                      param_feature_selection=None,
                      method_machine_learning=None,
                      param_machine_learning=None):

        self.make_pipeline_(
            method_feature_preprocessing=method_feature_preprocessing,
            param_feature_preprocessing=param_feature_preprocessing,
            method_dim_reduction=method_dim_reduction,
            param_dim_reduction=param_dim_reduction,
            method_feature_selection=method_feature_selection,
            param_feature_selection=param_feature_selection,
            method_machine_learning=method_machine_learning,
            param_machine_learning=param_machine_learning)

        print(self.param_search_)
        # Train
        self.fit_pipeline_(self.data_train, self.label_train)

        # Get weights
        self.get_weights_(self.data_train, self.label_train)
        self._weight2nii(self.weights_)

        # Predict
        pred_train, dec_train = self.predict(self.data_train)
        self.predict_validation, self.decision = self.predict(
            self.data_validation)

        # Eval performances
        print("Evaluating training data...")
        bi_evaluator = ModelEvaluator().binary_evaluator
        acc, sens, spec, auc = bi_evaluator(self.label_train,
                                            pred_train,
                                            dec_train,
                                            accuracy_kfold=None,
                                            sensitivity_kfold=None,
                                            specificity_kfold=None,
                                            AUC_kfold=None,
                                            verbose=1,
                                            is_showfig=False,
                                            is_savefig=True,
                                            out_name=os.path.join(
                                                self.save_directory,
                                                "performances_train.pdf"))

        print("Evaluating test data...")
        self.val_label = np.loadtxt(self.val_label)
        acc, sens, spec, auc = bi_evaluator(self.val_label,
                                            self.predict_validation,
                                            self.decision,
                                            accuracy_kfold=None,
                                            sensitivity_kfold=None,
                                            specificity_kfold=None,
                                            AUC_kfold=None,
                                            verbose=1,
                                            is_showfig=False,
                                            is_savefig=True,
                                            out_name=os.path.join(
                                                self.save_directory,
                                                "performances_test.pdf"))
Ejemplo n.º 10
0
    def pipeline_grid(self,
                      method_feature_preprocessing=None,
                      param_feature_preprocessing=None,
                      method_dim_reduction=None,
                      param_dim_reduction=None,
                      method_feature_selection=None,
                      param_feature_selection=None,
                      method_machine_learning=None,
                      param_machine_learning=None):

        self.make_pipeline_(
            method_feature_preprocessing=method_feature_preprocessing,
            param_feature_preprocessing=param_feature_preprocessing,
            method_dim_reduction=method_dim_reduction,
            param_dim_reduction=param_dim_reduction,
            method_feature_selection=method_feature_selection,
            param_feature_selection=param_feature_selection,
            method_machine_learning=method_machine_learning,
            param_machine_learning=param_machine_learning)

        accuracy_train = np.zeros([self.n_perm, 1])
        sensitivity_train = np.zeros([self.n_perm, 1])
        specificity_train = np.zeros([self.n_perm, 1])
        auc_train = np.zeros([self.n_perm, 1])

        accuracy_validation = np.zeros([self.n_perm, 1])
        sensitivity_validation = np.zeros([self.n_perm, 1])
        specificity_validation = np.zeros([self.n_perm, 1])
        auc_validation = np.zeros([self.n_perm, 1])

        for i in range(self.n_perm):
            print(f"Permutation {i+1}/{self.n_perm}\n")
            label_train_perm = np.random.permutation(self.label_train)

            # Train
            self.fit_pipeline_(self.data_train, label_train_perm)

            # Predict
            pred_train, dec_train = self.predict(self.data_train)
            self.predict_validation, self.decision = self.predict(
                self.data_validation)

            # Eval performances
            bi_evaluator = ModelEvaluator().binary_evaluator
            acc, sens, spec, auc = bi_evaluator(self.label_train,
                                                pred_train,
                                                dec_train,
                                                accuracy_kfold=None,
                                                sensitivity_kfold=None,
                                                specificity_kfold=None,
                                                AUC_kfold=None,
                                                verbose=False,
                                                is_showfig=False,
                                                is_savefig=False,
                                                out_name=os.path.join(
                                                    self.save_directory,
                                                    "performances_train.pdf"))
            accuracy_train[i] = acc
            sensitivity_train[i] = sens
            specificity_train[i] = spec
            auc_train[i] = auc

            acc, sens, spec, auc = bi_evaluator(self.label_validation,
                                                self.predict_validation,
                                                self.decision,
                                                accuracy_kfold=None,
                                                sensitivity_kfold=None,
                                                specificity_kfold=None,
                                                AUC_kfold=None,
                                                verbose=False,
                                                is_showfig=False,
                                                is_savefig=False,
                                                out_name=os.path.join(
                                                    self.save_directory,
                                                    "performances_test.pdf"))
            accuracy_validation[i] = acc
            sensitivity_validation[i] = sens
            specificity_validation[i] = spec
            auc_validation[i] = auc

        np.save(
            os.path.join(self.save_directory,
                         "permutation_test_results_train"),
            [accuracy_train, sensitivity_train, specificity_train, auc_train])

        np.save(
            os.path.join(self.save_directory,
                         "permutation_test_results_validation"), [
                             accuracy_validation, sensitivity_validation,
                             specificity_validation, auc_validation
                         ])