Python Preprocessing Examples

Programming Language: Python

Namespace/Package Name: eslearn.feature_engineering.feature_preprocessing.el_preprocessing

Method/Function: Preprocessing

Examples at hotexamples.com: 3

Python Preprocessing - 3 examples found. These are the top rated real world Python examples of eslearn.feature_engineering.feature_preprocessing.el_preprocessing.Preprocessing extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: lc_pca_svc_5fold_feu_permutation_test_for_resid.py Project: lichao312214129/SSD_classification

    def main_function(self, i, label_all_perm, feature_all):
        """The training data, validation data and  test data are randomly splited
        """
        print(f"Permutaion {i}...\n")
        # KFold Cross Validation
        accuracy, sensitivity, specificity, AUC = np.array([]), np.array(
            []), np.array([]), np.array([])
        kf = KFold(n_splits=self.cv, shuffle=True, random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(feature_all)):
            feature_train = feature_all[tr_ind, :]
            label_train = label_all_perm[tr_ind]
            feature_test = feature_all[te_ind, :]
            label_test = label_all_perm[te_ind]

            # normalization
            prep = el_preprocessing.Preprocessing(
                data_preprocess_method='StandardScaler',
                data_preprocess_level='group')
            feature_train, feature_test = prep.data_preprocess(
                feature_train, feature_test)

            # dimension reduction
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = el_dimreduction.pca_apply(
                    feature_train, feature_test, self.components)

            # train
            model = self.training(feature_train, label_train)

            # test
            pred, dec = self.testing(model, feature_test)

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test,
                                                    pred,
                                                    dec,
                                                    accuracy_kfold=None,
                                                    sensitivity_kfold=None,
                                                    specificity_kfold=None,
                                                    AUC_kfold=None,
                                                    verbose=0,
                                                    is_showfig=0)
            accuracy = np.append(accuracy, acc)
            sensitivity = np.append(sensitivity, sens)
            specificity = np.append(specificity, spec)
            AUC = np.append(AUC, auc)

        # return np.mean(accuracy),np.mean(sensitivity), np.mean(specificity), np.mean(AUC)
        return accuracy, sensitivity, specificity, AUC

Example #2

Show file

    def main_svc_rfe_cv(sel):
        print('Training model and testing...\n')
        # Load data
        uid_550, feature_550, label_550 = sel._load_data(sel.dataset_our_center_550)
        uid_206, feature_206, label_206 = sel._load_data(sel.dataset_206)
        uid_COBRE, feature_COBRE, label_COBRE = sel._load_data(sel.data_COBRE)
        uid_UCAL, feature_UCAL, label_UCAL = sel._load_data(sel.data_UCAL)
        uid_all = np.concatenate([uid_550, uid_206, uid_COBRE, uid_UCAL])
        feature_all = [feature_550, feature_206, feature_COBRE, feature_UCAL]
        sel.label_all = [label_550, label_206, label_COBRE, label_UCAL]
        name = ['550','206','COBRE','UCLA']

        # Leave one site CV
        n_site = len(sel.label_all)
        test_index = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy = np.array([], dtype=np.float16)
        sel.sensitivity = np.array([], dtype=np.float16)
        sel.specificity = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []
        for i in range(n_site):
            print('-'*40)
            print(f'{i+1}/{n_site}: test dataset is {name[i]}...')
            feature_train, label_train = feature_all.copy(), sel.label_all.copy()
            feature_test, label_test = feature_train.pop(i), label_train.pop(i)
            feature_train = np.concatenate(feature_train, axis=0)
            label_train = np.concatenate(label_train, axis=0)

            # Resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)
            # Normalization
            prep = el_preprocessing.Preprocessing(data_preprocess_method='StandardScaler', data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(feature_train, feature_test)

            # Dimension reduction
            if sel.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = el_dimreduction.pca_apply(
                    feature_train, feature_test, sel.components
                )

                print(f'After dimension reduction, the feature number is {feature_train.shape[1]}')
            else:
                print('No dimension reduction perfromed\n')

            # Train and test
            print('training and testing...\n')
            model = sel.training(feature_train, label_train, sel.cv)
        
            if sel.is_dim_reduction:
                sel.coef.append(model_dim_reduction.inverse_transform(model.coef_))  # save coef
            else:
                sel.coef.append(clf.coef_)  # save coef

            pred, dec = sel.testing(model, feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))
            
            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test, pred, dec, 
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                 verbose=1, is_showfig=0)
            sel.accuracy = np.append(sel.accuracy, acc)
            sel.sensitivity = np.append(sel.sensitivity, sens)
            sel.specificity = np.append(sel.specificity, spec)
            sel.AUC = np.append(sel.AUC, auc)
            print(f'performances = {acc, sens, spec,auc}')
        
        sel.label_all = np.concatenate(sel.label_all)
        sel.special_result = np.concatenate( [uid_all, sel.label_all, sel.decision, sel.prediction], axis=0).reshape(4, -1).T
        return sel

Example #3

Show file

File: pca_rfe_svc_cv.py Project: pppppp040/easylearn_dev

    def main_function(self):
        """
        This function is the main function.
        """

        # Load data and mask
        data_all, label_all, self.orig_shape, self.mask_obj, self.mask_all = self._load_nii_and_gen_label(
        )

        # KFold Cross Validation
        self.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        self.decision = np.array([], dtype=np.int16)
        self.prediction = np.array([], dtype=np.int16)
        self.accuracy = np.array([], dtype=np.float16)
        self.sensitivity = np.array([], dtype=np.float16)
        self.specificity = np.array([], dtype=np.float16)
        self.AUC = np.array([], dtype=np.float16)
        self.coef = []
        kf = KFold(n_splits=self.num_of_fold_outer,
                   shuffle=True,
                   random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{self.num_of_fold_outer}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            self.label_test_all = np.int16(
                np.append(self.label_test_all, label_test))

            # Resampling training data
            feature_train, label_train = self.re_sampling(
                feature_train, label_train)

            # data_preprocess
            prep = elprep.Preprocessing(self.data_preprocess_method,
                                        self.data_preprocess_level)
            feature_train, feature_test = prep.data_preprocess(
                feature_train, feature_test)

            # dimension reduction using univariate feature selection
            # feature_train, feature_test, mask_selected = self.dimReduction_filter(
            #         feature_train, label_train, feature_test, 0.05)

            # Dimension reduction using PCA
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = self.dimReduction_PCA(
                    feature_train, feature_test, self.components)
                print(
                    f'After dimension reduction, the feature number is {feature_train.shape[1]}'
                )
            else:
                print('No dimension reduction perfromed\n')
                print(f'The feature number is {feature_train.shape[1]}')

            # Train: inner feature selection using RFECV
            print('Training...\n')
            model, weight = self.rfeCV_training(feature_train, label_train,
                                                self.step,
                                                self.num_fold_of_inner_rfeCV,
                                                self.n_jobs)

            if self.is_dim_reduction:
                self.coef.append(model_dim_reduction.inverse_transform(weight))
            else:
                self.coef.append(weight)

            # Testting
            print('Testting...\n')
            pred, dec = self.testing(model, feature_test)
            self.prediction = np.append(self.prediction, np.array(pred))
            self.decision = np.append(self.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(
                label_test,
                pred,
                dec,
                accuracy_kfold=None,
                sensitivity_kfold=None,
                specificity_kfold=None,
                AUC_kfold=None,
                verbose=1,
                is_showfig=self.is_showfig_in_each_fold)

            self.accuracy = np.append(self.accuracy, acc)
            self.sensitivity = np.append(self.sensitivity, sens)
            self.specificity = np.append(self.specificity, spec)
            self.AUC = np.append(self.AUC, auc)

        # Save results and fig to local path
        self.save_results()
        self._weight2nii(dimension_nii_data=(61, 73, 61))
        self.save_fig()

        print("--" * 10 + "Done!" + "--" * 10)
        return self