def TestNewData(NewDataCsv, model_folder, result_save_path=''): ''' :param NewDataCsv: New radiomics feature matrix csv file path :param model_folder:The trained model path :return:classification result ''' train_info = LoadTrainInfo(model_folder) new_data_container = DataContainer() #Normlization new_data_container.Load(NewDataCsv) # feature_selector = FeatureSelector() # feature_selector.SelectFeatureByName(new_data_container, train_info['selected_features'], is_replace=True) new_data_container = train_info['normalizer'].Transform(new_data_container) # data_frame = new_data_container.GetFrame() # data_frame = data_frame[train_info['selected_features']] # new_data_container.SetFrame(data_frame) # new_data_container.UpdateDataByFrame() ##Model train_info['classifier'].SetDataContainer(new_data_container) model = train_info['classifier'].GetModel() predict = model.predict_proba(new_data_container.GetArray())[:, 1] label = new_data_container.GetLabel() case_name = new_data_container.GetCaseName() test_result_info = [['CaseName', 'Pred', 'Label']] for index in range(len(label)): test_result_info.append( [case_name[index], predict[index], label[index]]) metric = EstimateMetirc(predict, label) info = {} info.update(metric) cv = CrossValidation() print(metric) print('\t') if result_save_path: cv.SaveResult(info, result_save_path) np.save(os.path.join(result_save_path, 'test_predict.npy'), predict) np.save(os.path.join(result_save_path, 'test_label.npy'), label) with open(os.path.join(result_save_path, 'test_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(test_result_info) return metric
return "To Remove the unbalance of the training data set, we applied an Tomek link after the " \ "Synthetic Minority Oversampling TEchnique (SMOTE) to make positive/negative samples balance. " def Run(self, data_container, store_path=''): data, label, feature_name, label_name = data_container.GetData() data_resampled, label_resampled = self._model.fit_sample(data, label) new_case_name = [ 'Generate' + str(index) for index in range(data_resampled.shape[0]) ] new_data_container = DataContainer(data_resampled, label_resampled, data_container.GetFeatureName(), new_case_name) if store_path != '': if os.path.isdir(store_path): new_data_container.Save( os.path.join(store_path, '{}_features.csv'.format(self._name))) else: new_data_container.Save(store_path) return new_data_container if __name__ == '__main__': dc = DataContainer() dc.Load(r'..\..\Example\numeric_feature.csv') print(dc.GetArray().shape, np.sum(dc.GetLabel())) b = SmoteTomekSampling() new = b.Run(dc) print(new.GetArray().shape, np.sum(new.GetLabel()))