def MergeCvResult(self, store_folder): num = 0 for norm_index, normalizer in enumerate(self.normalizer_list): norm_store_folder = MakeFolder(store_folder, normalizer.GetName()) for dr_index, dr in enumerate(self.dimension_reduction_list): dr_store_folder = MakeFolder(norm_store_folder, dr.GetName()) for fs_index, fs in enumerate(self.feature_selector_list): for fn_index, fn in enumerate(self.feature_selector_num_list): fs_store_folder = MakeFolder(dr_store_folder, '{}_{}'.format(fs.GetName(), fn)) for cls_index, cls in enumerate(self.classifier_list): cls_store_folder = MakeFolder(fs_store_folder, cls.GetName()) model_name = self.GetStoreName(normalizer.GetName(), dr.GetName(), fs.GetName(), str(fn), cls.GetName()) num += 1 yield self.total_num, num # ADD CV Train cv_train_info = pd.read_csv(os.path.join(cls_store_folder, '{}_prediction.csv'.format(CV_TRAIN)), index_col=0) cv_train_metric = EstimatePrediction(cv_train_info['Pred'], cv_train_info['Label'], key_word=CV_TRAIN) self.__auc_dict[CV_TRAIN][norm_index, dr_index, fs_index, fn_index, cls_index] = \ cv_train_metric['{}_{}'.format(CV_TRAIN, AUC)] self.__auc_std_dict[CV_TRAIN][norm_index, dr_index, fs_index, fn_index, cls_index] = \ cv_train_metric['{}_{}'.format(CV_TRAIN, AUC_STD)] self._AddOneMetric(cv_train_metric, os.path.join(cls_store_folder, 'metrics.csv')) self._MergeOneMetric(cv_train_metric, CV_TRAIN, model_name) # ADD CV Validation cv_val_info = pd.read_csv(os.path.join(cls_store_folder, '{}_prediction.csv'.format(CV_VAL)), index_col=0) cv_val_metric = EstimatePrediction(cv_val_info['Pred'], cv_val_info['Label'], key_word=CV_VAL) self.__auc_dict[CV_VAL][norm_index, dr_index, fs_index, fn_index, cls_index] = \ cv_val_metric['{}_{}'.format(CV_VAL, AUC)] self.__auc_std_dict[CV_VAL][norm_index, dr_index, fs_index, fn_index, cls_index] = \ cv_val_metric['{}_{}'.format(CV_VAL, AUC_STD)] self._AddOneMetric(cv_val_metric, os.path.join(cls_store_folder, 'metrics.csv')) self._MergeOneMetric(cv_val_metric, CV_VAL, model_name) self.total_metric[CV_TRAIN].to_csv(os.path.join(store_folder, '{}_results.csv'.format(CV_TRAIN))) self.total_metric[CV_VAL].to_csv(os.path.join(store_folder, '{}_results.csv'.format(CV_VAL)))
def RunWithCV(self, train_container, store_folder=''): for group, containers in enumerate(self.cv.Generate(train_container)): cv_train_container, cv_val_container = containers balance_cv_train_container = self.balance.Run(cv_train_container) num = 0 for norm_index, normalizer in enumerate(self.normalizer_list): norm_store_folder = MakeFolder(store_folder, normalizer.GetName()) norm_cv_train_container = normalizer.Run( balance_cv_train_container) norm_cv_val_container = normalizer.Transform(cv_val_container) for dr_index, dr in enumerate(self.dimension_reduction_list): dr_store_folder = MakeFolder(norm_store_folder, dr.GetName()) if dr: dr_cv_train_container = dr.Run(norm_cv_train_container) dr_cv_val_container = dr.Transform( norm_cv_val_container) else: dr_cv_train_container = norm_cv_train_container dr_cv_val_container = norm_cv_val_container for fs_index, fs in enumerate(self.feature_selector_list): for fn_index, fn in enumerate( self.feature_selector_num_list): if fs: fs_store_folder = MakeFolder( dr_store_folder, '{}_{}'.format(fs.GetName(), fn)) fs.SetSelectedFeatureNumber(fn) fs_cv_train_container = fs.Run( dr_cv_train_container) fs_cv_val_container = fs.Transform( dr_cv_val_container) else: fs_store_folder = dr_store_folder fs_cv_train_container = dr_cv_train_container fs_cv_val_container = dr_cv_val_container for cls_index, cls in enumerate( self.classifier_list): cls_store_folder = MakeFolder( fs_store_folder, cls.GetName()) model_name = self.GetStoreName( normalizer.GetName(), dr.GetName(), fs.GetName(), str(fn), cls.GetName()) num += 1 yield self.total_num, num, group cls.SetDataContainer(fs_cv_train_container) cls.Fit() cv_train_pred = cls.Predict( fs_cv_train_container.GetArray()) cv_train_label = fs_cv_train_container.GetLabel( ) cv_train_info = pd.DataFrame( { 'Pred': cv_train_pred, 'Label': cv_train_label, 'Group': [group for temp in cv_train_label] }, index=fs_cv_train_container.GetCaseName()) cv_val_pred = cls.Predict( fs_cv_val_container.GetArray()) cv_val_label = fs_cv_val_container.GetLabel() cv_val_info = pd.DataFrame( { 'Pred': cv_val_pred, 'Label': cv_val_label, 'Group': [group for temp in cv_val_label] }, index=fs_cv_val_container.GetCaseName()) if store_folder: self._AddOneCvPrediction( os.path.join( cls_store_folder, '{}_prediction.csv'.format( CV_TRAIN)), cv_train_info) self._AddOneCvPrediction( os.path.join( cls_store_folder, '{}_prediction.csv'.format( CV_VAL)), cv_val_info)
def RunWithoutCV(self, train_container, test_container=DataContainer(), store_folder=''): self.SavePipelineInfo(store_folder) num = 0 # TODO: Balance后面也可以变成循环处理: balance_train_container = self.balance.Run(train_container, store_folder) for norm_index, normalizer in enumerate(self.normalizer_list): norm_store_folder = MakeFolder(store_folder, normalizer.GetName()) norm_balance_train_container = normalizer.Run( balance_train_container, norm_store_folder, store_key=BALANCE_TRAIN) norm_train_container = normalizer.Transform(train_container, norm_store_folder, store_key=TRAIN) norm_test_container = normalizer.Transform(test_container, norm_store_folder, store_key=TEST) for dr_index, dr in enumerate(self.dimension_reduction_list): dr_store_folder = MakeFolder(norm_store_folder, dr.GetName()) if dr: dr_balance_train_container = dr.Run( norm_balance_train_container, dr_store_folder, BALANCE_TRAIN) dr_train_container = dr.Transform(norm_train_container, dr_store_folder, TRAIN) if not test_container.IsEmpty(): dr_test_container = dr.Transform( norm_test_container, dr_store_folder, TEST) else: dr_test_container = norm_test_container else: dr_balance_train_container = norm_balance_train_container dr_train_container = norm_train_container dr_test_container = norm_test_container for fs_index, fs in enumerate(self.feature_selector_list): for fn_index, fn in enumerate( self.feature_selector_num_list): if fs: fs_store_folder = MakeFolder( dr_store_folder, '{}_{}'.format(fs.GetName(), fn)) fs.SetSelectedFeatureNumber(fn) fs_balance_train_container = fs.Run( dr_balance_train_container, fs_store_folder, BALANCE_TRAIN) fs_train_container = fs.Transform( dr_train_container, fs_store_folder, TRAIN) fs_test_container = fs.Transform( dr_test_container, fs_store_folder, TEST) else: fs_store_folder = dr_store_folder fs_balance_train_container = dr_balance_train_container fs_train_container = dr_train_container fs_test_container = dr_test_container for cls_index, cls in enumerate(self.classifier_list): cls_store_folder = MakeFolder( fs_store_folder, cls.GetName()) model_name = self.GetStoreName( normalizer.GetName(), dr.GetName(), fs.GetName(), str(fn), cls.GetName()) matrics_index = (norm_index, dr_index, fs_index, fn_index, cls_index) num += 1 yield self.total_num, num cls.SetDataContainer(fs_balance_train_container) cls.Fit() cls.Save(cls_store_folder) balance_train_pred = cls.Predict( fs_balance_train_container.GetArray()) balance_train_label = fs_balance_train_container.GetLabel( ) self.SaveOneResult( balance_train_pred, balance_train_label, BALANCE_TRAIN, fs_balance_train_container.GetCaseName(), matrics_index, model_name, store_folder, cls_store_folder) train_data = fs_train_container.GetArray() train_label = fs_train_container.GetLabel() train_pred = cls.Predict(train_data) self.SaveOneResult( train_pred, train_label, TRAIN, fs_train_container.GetCaseName(), matrics_index, model_name, store_folder, cls_store_folder) if not test_container.IsEmpty(): test_data = fs_test_container.GetArray() test_label = fs_test_container.GetLabel() test_pred = cls.Predict(test_data) self.SaveOneResult( test_pred, test_label, TEST, fs_test_container.GetCaseName(), matrics_index, model_name, store_folder, cls_store_folder) self.total_metric[BALANCE_TRAIN].to_csv( os.path.join(store_folder, '{}_results.csv'.format(BALANCE_TRAIN))) self.total_metric[TRAIN].to_csv( os.path.join(store_folder, '{}_results.csv'.format(TRAIN))) if not test_container.IsEmpty(): self.total_metric[TEST].to_csv( os.path.join(store_folder, '{}_results.csv'.format(TEST)))