コード例 #1
0
ファイル: Pipelines.py プロジェクト: salan668/FAE
    def MergeCvResult(self, store_folder):
        num = 0
        for norm_index, normalizer in enumerate(self.normalizer_list):
            norm_store_folder = MakeFolder(store_folder, normalizer.GetName())
            for dr_index, dr in enumerate(self.dimension_reduction_list):
                dr_store_folder = MakeFolder(norm_store_folder, dr.GetName())
                for fs_index, fs in enumerate(self.feature_selector_list):
                    for fn_index, fn in enumerate(self.feature_selector_num_list):
                        fs_store_folder = MakeFolder(dr_store_folder, '{}_{}'.format(fs.GetName(), fn))
                        for cls_index, cls in enumerate(self.classifier_list):
                            cls_store_folder = MakeFolder(fs_store_folder, cls.GetName())
                            model_name = self.GetStoreName(normalizer.GetName(),
                                                           dr.GetName(),
                                                           fs.GetName(),
                                                           str(fn),
                                                           cls.GetName())
                            num += 1
                            yield self.total_num, num

                            # ADD CV Train
                            cv_train_info = pd.read_csv(os.path.join(cls_store_folder,
                                                                     '{}_prediction.csv'.format(CV_TRAIN)),
                                                        index_col=0)
                            cv_train_metric = EstimatePrediction(cv_train_info['Pred'], cv_train_info['Label'],
                                                                 key_word=CV_TRAIN)
                            self.__auc_dict[CV_TRAIN][norm_index, dr_index, fs_index, fn_index, cls_index] = \
                                cv_train_metric['{}_{}'.format(CV_TRAIN, AUC)]
                            self.__auc_std_dict[CV_TRAIN][norm_index, dr_index, fs_index, fn_index, cls_index] = \
                                cv_train_metric['{}_{}'.format(CV_TRAIN, AUC_STD)]
                            self._AddOneMetric(cv_train_metric, os.path.join(cls_store_folder, 'metrics.csv'))
                            self._MergeOneMetric(cv_train_metric, CV_TRAIN, model_name)

                            # ADD CV Validation
                            cv_val_info = pd.read_csv(os.path.join(cls_store_folder,
                                                                   '{}_prediction.csv'.format(CV_VAL)),
                                                      index_col=0)
                            cv_val_metric = EstimatePrediction(cv_val_info['Pred'], cv_val_info['Label'],
                                                               key_word=CV_VAL)
                            self.__auc_dict[CV_VAL][norm_index, dr_index, fs_index, fn_index, cls_index] = \
                                cv_val_metric['{}_{}'.format(CV_VAL, AUC)]
                            self.__auc_std_dict[CV_VAL][norm_index, dr_index, fs_index, fn_index, cls_index] = \
                                cv_val_metric['{}_{}'.format(CV_VAL, AUC_STD)]
                            self._AddOneMetric(cv_val_metric, os.path.join(cls_store_folder, 'metrics.csv'))
                            self._MergeOneMetric(cv_val_metric, CV_VAL, model_name)

        self.total_metric[CV_TRAIN].to_csv(os.path.join(store_folder, '{}_results.csv'.format(CV_TRAIN)))
        self.total_metric[CV_VAL].to_csv(os.path.join(store_folder, '{}_results.csv'.format(CV_VAL)))
コード例 #2
0
ファイル: Pipelines.py プロジェクト: zhangjingcode/FAE
    def RunWithCV(self, train_container, store_folder=''):
        for group, containers in enumerate(self.cv.Generate(train_container)):
            cv_train_container, cv_val_container = containers

            balance_cv_train_container = self.balance.Run(cv_train_container)
            num = 0
            for norm_index, normalizer in enumerate(self.normalizer_list):
                norm_store_folder = MakeFolder(store_folder,
                                               normalizer.GetName())
                norm_cv_train_container = normalizer.Run(
                    balance_cv_train_container)
                norm_cv_val_container = normalizer.Transform(cv_val_container)

                for dr_index, dr in enumerate(self.dimension_reduction_list):
                    dr_store_folder = MakeFolder(norm_store_folder,
                                                 dr.GetName())
                    if dr:
                        dr_cv_train_container = dr.Run(norm_cv_train_container)
                        dr_cv_val_container = dr.Transform(
                            norm_cv_val_container)
                    else:
                        dr_cv_train_container = norm_cv_train_container
                        dr_cv_val_container = norm_cv_val_container

                    for fs_index, fs in enumerate(self.feature_selector_list):
                        for fn_index, fn in enumerate(
                                self.feature_selector_num_list):
                            if fs:
                                fs_store_folder = MakeFolder(
                                    dr_store_folder,
                                    '{}_{}'.format(fs.GetName(), fn))
                                fs.SetSelectedFeatureNumber(fn)
                                fs_cv_train_container = fs.Run(
                                    dr_cv_train_container)
                                fs_cv_val_container = fs.Transform(
                                    dr_cv_val_container)
                            else:
                                fs_store_folder = dr_store_folder
                                fs_cv_train_container = dr_cv_train_container
                                fs_cv_val_container = dr_cv_val_container

                            for cls_index, cls in enumerate(
                                    self.classifier_list):
                                cls_store_folder = MakeFolder(
                                    fs_store_folder, cls.GetName())
                                model_name = self.GetStoreName(
                                    normalizer.GetName(), dr.GetName(),
                                    fs.GetName(), str(fn), cls.GetName())
                                num += 1
                                yield self.total_num, num, group

                                cls.SetDataContainer(fs_cv_train_container)
                                cls.Fit()

                                cv_train_pred = cls.Predict(
                                    fs_cv_train_container.GetArray())
                                cv_train_label = fs_cv_train_container.GetLabel(
                                )
                                cv_train_info = pd.DataFrame(
                                    {
                                        'Pred': cv_train_pred,
                                        'Label': cv_train_label,
                                        'Group':
                                        [group for temp in cv_train_label]
                                    },
                                    index=fs_cv_train_container.GetCaseName())

                                cv_val_pred = cls.Predict(
                                    fs_cv_val_container.GetArray())
                                cv_val_label = fs_cv_val_container.GetLabel()
                                cv_val_info = pd.DataFrame(
                                    {
                                        'Pred': cv_val_pred,
                                        'Label': cv_val_label,
                                        'Group':
                                        [group for temp in cv_val_label]
                                    },
                                    index=fs_cv_val_container.GetCaseName())

                                if store_folder:
                                    self._AddOneCvPrediction(
                                        os.path.join(
                                            cls_store_folder,
                                            '{}_prediction.csv'.format(
                                                CV_TRAIN)), cv_train_info)
                                    self._AddOneCvPrediction(
                                        os.path.join(
                                            cls_store_folder,
                                            '{}_prediction.csv'.format(
                                                CV_VAL)), cv_val_info)
コード例 #3
0
ファイル: Pipelines.py プロジェクト: zhangjingcode/FAE
    def RunWithoutCV(self,
                     train_container,
                     test_container=DataContainer(),
                     store_folder=''):
        self.SavePipelineInfo(store_folder)
        num = 0

        # TODO: Balance后面也可以变成循环处理:
        balance_train_container = self.balance.Run(train_container,
                                                   store_folder)

        for norm_index, normalizer in enumerate(self.normalizer_list):
            norm_store_folder = MakeFolder(store_folder, normalizer.GetName())
            norm_balance_train_container = normalizer.Run(
                balance_train_container,
                norm_store_folder,
                store_key=BALANCE_TRAIN)
            norm_train_container = normalizer.Transform(train_container,
                                                        norm_store_folder,
                                                        store_key=TRAIN)
            norm_test_container = normalizer.Transform(test_container,
                                                       norm_store_folder,
                                                       store_key=TEST)

            for dr_index, dr in enumerate(self.dimension_reduction_list):
                dr_store_folder = MakeFolder(norm_store_folder, dr.GetName())
                if dr:
                    dr_balance_train_container = dr.Run(
                        norm_balance_train_container, dr_store_folder,
                        BALANCE_TRAIN)
                    dr_train_container = dr.Transform(norm_train_container,
                                                      dr_store_folder, TRAIN)
                    if not test_container.IsEmpty():
                        dr_test_container = dr.Transform(
                            norm_test_container, dr_store_folder, TEST)
                    else:
                        dr_test_container = norm_test_container
                else:
                    dr_balance_train_container = norm_balance_train_container
                    dr_train_container = norm_train_container
                    dr_test_container = norm_test_container

                for fs_index, fs in enumerate(self.feature_selector_list):
                    for fn_index, fn in enumerate(
                            self.feature_selector_num_list):
                        if fs:
                            fs_store_folder = MakeFolder(
                                dr_store_folder,
                                '{}_{}'.format(fs.GetName(), fn))
                            fs.SetSelectedFeatureNumber(fn)
                            fs_balance_train_container = fs.Run(
                                dr_balance_train_container, fs_store_folder,
                                BALANCE_TRAIN)
                            fs_train_container = fs.Transform(
                                dr_train_container, fs_store_folder, TRAIN)
                            fs_test_container = fs.Transform(
                                dr_test_container, fs_store_folder, TEST)
                        else:
                            fs_store_folder = dr_store_folder
                            fs_balance_train_container = dr_balance_train_container
                            fs_train_container = dr_train_container
                            fs_test_container = dr_test_container

                        for cls_index, cls in enumerate(self.classifier_list):
                            cls_store_folder = MakeFolder(
                                fs_store_folder, cls.GetName())
                            model_name = self.GetStoreName(
                                normalizer.GetName(), dr.GetName(),
                                fs.GetName(), str(fn), cls.GetName())
                            matrics_index = (norm_index, dr_index, fs_index,
                                             fn_index, cls_index)
                            num += 1
                            yield self.total_num, num

                            cls.SetDataContainer(fs_balance_train_container)
                            cls.Fit()
                            cls.Save(cls_store_folder)

                            balance_train_pred = cls.Predict(
                                fs_balance_train_container.GetArray())
                            balance_train_label = fs_balance_train_container.GetLabel(
                            )
                            self.SaveOneResult(
                                balance_train_pred, balance_train_label,
                                BALANCE_TRAIN,
                                fs_balance_train_container.GetCaseName(),
                                matrics_index, model_name, store_folder,
                                cls_store_folder)

                            train_data = fs_train_container.GetArray()
                            train_label = fs_train_container.GetLabel()
                            train_pred = cls.Predict(train_data)
                            self.SaveOneResult(
                                train_pred, train_label, TRAIN,
                                fs_train_container.GetCaseName(),
                                matrics_index, model_name, store_folder,
                                cls_store_folder)

                            if not test_container.IsEmpty():
                                test_data = fs_test_container.GetArray()
                                test_label = fs_test_container.GetLabel()
                                test_pred = cls.Predict(test_data)
                                self.SaveOneResult(
                                    test_pred, test_label, TEST,
                                    fs_test_container.GetCaseName(),
                                    matrics_index, model_name, store_folder,
                                    cls_store_folder)

        self.total_metric[BALANCE_TRAIN].to_csv(
            os.path.join(store_folder, '{}_results.csv'.format(BALANCE_TRAIN)))
        self.total_metric[TRAIN].to_csv(
            os.path.join(store_folder, '{}_results.csv'.format(TRAIN)))
        if not test_container.IsEmpty():
            self.total_metric[TEST].to_csv(
                os.path.join(store_folder, '{}_results.csv'.format(TEST)))