def TestNewData(NewDataCsv, model_folder, result_save_path=''): ''' :param NewDataCsv: New radiomics feature matrix csv file path :param model_folder:The trained model path :return:classification result ''' train_info = LoadTrainInfo(model_folder) new_data_container = DataContainer() #Normlization new_data_container.Load(NewDataCsv) # feature_selector = FeatureSelector() # feature_selector.SelectFeatureByName(new_data_container, train_info['selected_features'], is_replace=True) new_data_container = train_info['normalizer'].Transform(new_data_container) # data_frame = new_data_container.GetFrame() # data_frame = data_frame[train_info['selected_features']] # new_data_container.SetFrame(data_frame) # new_data_container.UpdateDataByFrame() ##Model train_info['classifier'].SetDataContainer(new_data_container) model = train_info['classifier'].GetModel() predict = model.predict_proba(new_data_container.GetArray())[:, 1] label = new_data_container.GetLabel() case_name = new_data_container.GetCaseName() test_result_info = [['CaseName', 'Pred', 'Label']] for index in range(len(label)): test_result_info.append( [case_name[index], predict[index], label[index]]) metric = EstimateMetirc(predict, label) info = {} info.update(metric) cv = CrossValidation() print(metric) print('\t') if result_save_path: cv.SaveResult(info, result_save_path) np.save(os.path.join(result_save_path, 'test_predict.npy'), predict) np.save(os.path.join(result_save_path, 'test_label.npy'), label) with open(os.path.join(result_save_path, 'test_info.csv'), 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerows(test_result_info) return metric
vif_dict[exog] = vif # calculate tolerance tolerance = 1 - r_squared tolerance_dict[exog] = tolerance # return VIF DataFrame df_vif = pd.DataFrame({'VIF': vif_dict, 'Tolerance': tolerance_dict}) return df_vif if __name__ == '__main__': data_path = r'..\..\Demo\train_numeric_feature.csv' from BC.DataContainer.DataContainer import DataContainer from BC.FeatureAnalysis.Normalizer import NormalizerZeroCenter pca = DimensionReductionByPCA() dc = DataContainer() dc.Load(data_path) dc = NormalizerZeroCenter.Run(dc) # dc = pca.Run(dc) df = pd.DataFrame(dc.GetArray(), index=dc.GetCaseName(), columns=dc.GetFeatureName()) dr = DimensionReductionByVIF() new_df = dr.CalculateVIF(df) print(dc.GetArray().shape, new_df.shape)