def TestNewData(NewDataCsv, model_folder, result_save_path=''): ''' :param NewDataCsv: New radiomics feature matrix csv file path :param model_folder:The trained model path :return:classification result ''' train_info = LoadTrainInfo(model_folder) new_data_container = DataContainer() #Normlization new_data_container.Load(NewDataCsv) # feature_selector = FeatureSelector() # feature_selector.SelectFeatureByName(new_data_container, train_info['selected_features'], is_replace=True) new_data_container = train_info['normalizer'].Transform(new_data_container) # data_frame = new_data_container.GetFrame() # data_frame = data_frame[train_info['selected_features']] # new_data_container.SetFrame(data_frame) # new_data_container.UpdateDataByFrame() ##Model train_info['classifier'].SetDataContainer(new_data_container) model = train_info['classifier'].GetModel() predict = model.predict_proba(new_data_container.GetArray())[:, 1] label = new_data_container.GetLabel() case_name = new_data_container.GetCaseName() test_result_info = [['CaseName', 'Pred', 'Label']] for index in range(len(label)): test_result_info.append( [case_name[index], predict[index], label[index]]) metric = EstimateMetirc(predict, label) info = {} info.update(metric) cv = CrossValidation() print(metric) print('\t') if result_save_path: cv.SaveResult(info, result_save_path) np.save(os.path.join(result_save_path, 'test_predict.npy'), predict) np.save(os.path.join(result_save_path, 'test_label.npy'), label) with open(os.path.join(result_save_path, 'test_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(test_result_info) return metric
def Run(self, data_container, test_data_container=DataContainer(), store_folder=''): train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], [] data = data_container.GetArray() label = data_container.GetLabel() val_index_store = [] for train_index, val_index in self.__cv.split(data, label): val_index_store.extend(val_index) train_data = data[train_index, :] train_label = label[train_index] val_data = data[val_index, :] val_label = label[val_index] self.__classifier.SetData(train_data, train_label) self.__classifier.Fit() train_prob = self.__classifier.Predict(train_data) val_prob = self.__classifier.Predict(val_data) train_pred_list.extend(train_prob) train_label_list.extend(train_label) val_pred_list.extend(val_prob) val_label_list.extend(val_label) total_train_label = np.asarray(train_label_list, dtype=np.uint8) total_train_pred = np.asarray(train_pred_list, dtype=np.float32) train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train') total_label = np.asarray(val_label_list, dtype=np.uint8) total_pred = np.asarray(val_pred_list, dtype=np.float32) val_metric = EstimateMetirc(total_pred, total_label, 'val') self.__classifier.SetDataContainer(data_container) self.__classifier.Fit() test_metric = {} if test_data_container.GetArray().size > 0: test_data = test_data_container.GetArray() test_label = test_data_container.GetLabel() test_pred = self.__classifier.Predict(test_data) test_metric = EstimateMetirc(test_pred, test_label, 'test') if store_folder: if not os.path.exists(store_folder): os.mkdir(store_folder) info = {} info.update(train_metric) info.update(val_metric) np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred) np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred) np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label) np.save(os.path.join(store_folder, 'val_label.npy'), total_label) cv_info_path = os.path.join(store_folder, 'cv_info.csv') df = pd.DataFrame(data=val_index_store) df.to_csv(cv_info_path) DrawROCList(total_train_pred, total_train_label, store_path=os.path.join(store_folder, 'train_ROC.jpg'), is_show=False) DrawROCList(total_pred, total_label, store_path=os.path.join(store_folder, 'val_ROC.jpg'), is_show=False) if test_data_container.GetArray().size > 0: info.update(test_metric) np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred) np.save(os.path.join(store_folder, 'test_label.npy'), test_label) DrawROCList(test_pred, test_label, store_path=os.path.join(store_folder, 'test_ROC.jpg'), is_show=False) self.__classifier.Save(store_folder) self.SaveResult(info, store_folder) return train_metric, val_metric, test_metric
def Run(self, data_container, test_data_container=DataContainer(), store_folder=''): train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], [] data = data_container.GetArray() label = data_container.GetLabel() for train_index, val_index in self.__cv.split(data, label): train_data = data[train_index, :] train_label = label[train_index] val_data = data[val_index, :] val_label = label[val_index] self.__classifier.SetData(train_data, train_label) self.__classifier.Fit() train_prob = self.__classifier.Predict(train_data) val_prob = self.__classifier.Predict(val_data) train_pred_list.extend(train_prob) train_label_list.extend(train_label) val_pred_list.extend(val_prob) val_label_list.extend(val_label) total_train_label = np.asarray(train_label_list, dtype=np.uint8) total_train_pred = np.asarray(train_pred_list, dtype=np.float32) train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train') total_label = np.asarray(val_label_list, dtype=np.uint8) total_pred = np.asarray(val_pred_list, dtype=np.float32) val_metric = EstimateMetirc(total_pred, total_label, 'val') self.__classifier.SetData(data, label) self.__classifier.Fit() test_metric = {} if test_data_container.GetArray().size > 0: selected_feature_name = data_container.GetFeatureName() fs = FeatureSelector() test_data_container = fs.SelectFeatureByName( test_data_container, selected_feature_name) test_data = test_data_container.GetArray() test_label = test_data_container.GetLabel() test_pred = self.__classifier.Predict(test_data) test_metric = EstimateMetirc(test_pred, test_label, 'test') if store_folder and os.path.isdir(store_folder): info = {} info.update(train_metric) info.update(val_metric) np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred) np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred) np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label) np.save(os.path.join(store_folder, 'val_label.npy'), total_label) if test_data_container.GetArray().size > 0: info.update(test_metric) np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred) np.save(os.path.join(store_folder, 'test_label.npy'), test_label) self.__classifier.Save(store_folder) self.SaveCVInfo(info, store_folder) return train_metric, val_metric, test_metric
def Run(self, data_container, test_data_container=DataContainer(), store_folder='', is_hyper_parameter=False): train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], [] data = data_container.GetArray() label = data_container.GetLabel() case_name = data_container.GetCaseName() param_metric_train_auc = [] param_metric_val_auc = [] param_all = [] if len(self.classifier_parameter_list) == 1 and is_hyper_parameter: self.AutoLoadClassifierParameterList( relative_path=r'HyperParameters\Classifier') for parameter in self.classifier_parameter_list: self.SetDefaultClassifier() self.classifier.SetModelParameter(parameter) train_cv_info = [['CaseName', 'Group', 'Pred', 'Label']] val_cv_info = [['CaseName', 'Group', 'Pred', 'Label']] group_index = 0 for train_index, val_index in self.__cv.split(data, label): group_index += 1 train_data = data[train_index, :] train_label = label[train_index] val_data = data[val_index, :] val_label = label[val_index] self.classifier.SetData(train_data, train_label) self.classifier.Fit() train_prob = self.classifier.Predict(train_data) val_prob = self.classifier.Predict(val_data) for index in range(len(train_index)): train_cv_info.append([ case_name[train_index[index]], str(group_index), train_prob[index], train_label[index] ]) for index in range(len(val_index)): val_cv_info.append([ case_name[val_index[index]], str(group_index), val_prob[index], val_label[index] ]) train_pred_list.extend(train_prob) train_label_list.extend(train_label) val_pred_list.extend(val_prob) val_label_list.extend(val_label) total_train_label = np.asarray(train_label_list, dtype=np.uint8) total_train_pred = np.asarray(train_pred_list, dtype=np.float32) train_cv_metric = EstimateMetirc(total_train_pred, total_train_label, 'train') total_val_label = np.asarray(val_label_list, dtype=np.uint8) total_val_pred = np.asarray(val_pred_list, dtype=np.float32) val_cv_metric = EstimateMetirc(total_val_pred, total_val_label, 'val') param_metric_train_auc.append(float(train_cv_metric['train_auc'])) param_metric_val_auc.append(float(val_cv_metric['val_auc'])) param_all.append({ 'total_train_label': total_train_label, 'total_train_pred': total_train_pred, 'train_metric': train_cv_metric, 'train_cv_info': deepcopy(train_cv_info), 'total_val_label': total_val_label, 'total_val_pred': total_val_pred, 'val_metric': val_cv_metric, 'val_cv_info': deepcopy(val_cv_info) }) # find the best parameter index = np.argmax(param_metric_val_auc) total_train_label = param_all[index]['total_train_label'] total_train_pred = param_all[index]['total_train_pred'] train_cv_metric = param_all[index]['train_metric'] train_cv_info = param_all[index]['train_cv_info'] total_val_label = param_all[index]['total_val_label'] total_val_pred = param_all[index]['total_val_pred'] val_cv_metric = param_all[index]['val_metric'] val_cv_info = param_all[index]['val_cv_info'] self.SetDefaultClassifier() self.classifier.SetModelParameter( self.classifier_parameter_list[index]) self.classifier.SetDataContainer(data_container) self.classifier.Fit() all_train_pred = self.classifier.Predict(data_container.GetArray()) all_train_label = data_container.GetLabel() all_train_metric = EstimateMetirc(all_train_pred, all_train_label, 'all_train') test_metric = {} if test_data_container.GetArray().size > 0: test_data = test_data_container.GetArray() test_label = test_data_container.GetLabel() test_case_name = test_data_container.GetCaseName() test_pred = self.classifier.Predict(test_data) test_metric = EstimateMetirc(test_pred, test_label, 'test') if store_folder: if not os.path.exists(store_folder): os.mkdir(store_folder) # Save the Parameter: if self.classifier_parameter_list[0] != {}: with open(os.path.join(store_folder, 'Classifier_Param_Result.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['Param', 'Train AUC', 'Val AUC']) for param, param_index in zip( self.classifier_parameter_list, range(len(self.classifier_parameter_list))): writer.writerow([ self._GetNameOfParamDict(param), param_metric_train_auc[param_index], param_metric_val_auc[param_index] ]) info = {} info.update(train_cv_metric) info.update(val_cv_metric) info.update(all_train_metric) np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred) np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label) np.save(os.path.join(store_folder, 'val_predict.npy'), total_val_pred) np.save(os.path.join(store_folder, 'val_label.npy'), total_val_label) np.save(os.path.join(store_folder, 'all_train_predict.npy'), all_train_pred) np.save(os.path.join(store_folder, 'all_train_label.npy'), all_train_label) with open(os.path.join(store_folder, 'train_cv5_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(train_cv_info) with open(os.path.join(store_folder, 'val_cv5_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(val_cv_info) if test_data_container.GetArray().size > 0: info.update(test_metric) np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred) np.save(os.path.join(store_folder, 'test_label.npy'), test_label) test_result_info = [['CaseName', 'Pred', 'Label']] for index in range(len(test_label)): test_result_info.append([ test_case_name[index], test_pred[index], test_label[index] ]) with open(os.path.join(store_folder, 'test_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(test_result_info) self.classifier.Save(store_folder) self.SaveResult(info, store_folder) return train_cv_metric, val_cv_metric, test_metric, all_train_metric
def Run(self, data_container, test_data_container=DataContainer(), store_folder=''): train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], [] data = data_container.GetArray() label = data_container.GetLabel() case_name = data_container.GetCaseName() train_cv_info = [['CaseName', 'Pred', 'Label']] val_cv_info = [['CaseName', 'Pred', 'Label']] for train_index, val_index in self.__cv.split(data, label): train_data = data[train_index, :] train_label = label[train_index] val_data = data[val_index, :] val_label = label[val_index] self.classifier.SetData(train_data, train_label) self.classifier.Fit() train_prob = self.classifier.Predict(train_data) val_prob = self.classifier.Predict(val_data) for index in range(len(train_index)): train_cv_info.append( [case_name[train_index[index]], train_prob[index], train_label[index]]) for index in range(len(val_index)): val_cv_info.append([case_name[val_index[index]], val_prob[index], val_label[index]]) train_pred_list.extend(train_prob) train_label_list.extend(train_label) val_pred_list.extend(val_prob) val_label_list.extend(val_label) total_train_label = np.asarray(train_label_list, dtype=np.uint8) total_train_pred = np.asarray(train_pred_list, dtype=np.float32) train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train') total_label = np.asarray(val_label_list, dtype=np.uint8) total_pred = np.asarray(val_pred_list, dtype=np.float32) val_metric = EstimateMetirc(total_pred, total_label, 'val') self.classifier.SetDataContainer(data_container) self.classifier.Fit() test_metric = {} if test_data_container.GetArray().size > 0: test_data = test_data_container.GetArray() test_label = test_data_container.GetLabel() test_case_name = test_data_container.GetCaseName() test_pred = self.classifier.Predict(test_data) test_metric = EstimateMetirc(test_pred, test_label, 'test') if store_folder: if not os.path.exists(store_folder): os.mkdir(store_folder) info = {} info.update(train_metric) info.update(val_metric) np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred) np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred) np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label) np.save(os.path.join(store_folder, 'val_label.npy'), total_label) with open(os.path.join(store_folder, 'train_cvloo_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(train_cv_info) with open(os.path.join(store_folder, 'val_cvloo_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(val_cv_info) if test_data_container.GetArray().size > 0: info.update(test_metric) np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred) np.save(os.path.join(store_folder, 'test_label.npy'), test_label) test_result_info = [['CaseName', 'Pred', 'Label']] for index in range(len(test_label)): test_result_info.append([test_case_name[index], test_pred[index], test_label[index]]) with open(os.path.join(store_folder, 'test_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(test_result_info) self.classifier.Save(store_folder) self.SaveResult(info, store_folder) return train_metric, val_metric, test_metric