def main(): ''' #################################################### # stone data 데이터셋 : dataset.get_df_stone #################################################### ''' df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone( k_fold=args.k_fold, out_dim=args.out_dim, data_dir=args.data_dir, data_folder=args.data_folder, use_meta=args.use_meta, use_ext=args.use_ext) # 모델 트랜스폼 가져오기 transforms_train, transforms_val = get_transforms(args.image_size) folds = range(args.k_fold) #folds = [0, 1, 2, 3, 4] for fold in folds: run(fold, df_train, meta_features, n_meta_features, transforms_train, transforms_val, target_idx)
def main(): ''' #################################################### # stone data 데이터셋 : dataset.get_df_stone #################################################### ''' df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone( k_fold=args.k_fold, out_dim=args.out_dim, data_dir=args.data_dir, data_folder=args.data_folder, use_meta=args.use_meta, use_ext=args.use_ext) # customize image transform # albumentations.MotionBlur(blur_limit=attack_strength, p=1.0), # 모델 트랜스폼 가져오기 #transforms_train, transforms_val = get_transforms(args.image_size) auc_save = [] #attack_strengths = [0,3,5,7,9,11] attack_strengths = [13, 17, 21, 25, 29] for attack_strength in attack_strengths: transforms_train, transforms_val = get_transforms( args.image_size, attack_strength) auc_max = run(0, df_train, meta_features, n_meta_features, transforms_train, transforms_val, target_idx) print(auc_max) auc_save.append(auc_max) with open(os.path.join("./noise_watch", f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write('attack_strengths : ' + str(attack_strength) + ' auc : ' + str(auc_max) + '\n') plt.plot(auc_save) plt.show()
def main(): ''' #################################################### # stone data 데이터셋 : dataset.get_df_stone #################################################### ''' df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone( k_fold=args.k_fold, out_dim=args.out_dim, data_dir=args.data_dir, data_folder=args.data_folder, use_meta=args.use_meta, use_ext=args.use_ext) transforms_train, transforms_val = get_transforms(args.image_size) # https://discuss.pytorch.org/t/error-expected-more-than-1-value-per-channel-when-training/26274 # batch_normalization에서 배치 사이즈 1인 경우 에러 발생할 수 있음 # 문제가 발생한 경우 배치 사이즈를 조정해서 해야한다. if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MMC_ClassificationDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) PROBS = [] folds = range(args.k_fold) for fold in folds: if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_no_ext': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_no_ext_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() PROBS = [] TARGETS = [] with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros( (data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros( (data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test PROBS.append(probs.detach().cpu()) PROBS = torch.cat(PROBS).numpy() df_test['target'] = PROBS[:, target_idx] #acc = (PROBS.argmax(1) == TARGETS).mean() * 100. #auc = roc_auc_score((TARGETS == target_idx).astype(float), PROBS[:, target_idx]) #df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}_{fold}_{acc:.2f}_{auc:.4f}.csv'), index=False) df_test[['image_name', 'target']].to_csv(os.path.join( args.sub_dir, f'sub_{args.kernel_type}_{args.eval}_{fold}.csv'), index=False)
def main(): ''' #################################################### # stone data 데이터셋 : dataset.get_df_stone #################################################### ''' df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone( k_fold=args.k_fold, out_dim=args.out_dim, data_dir=args.data_dir, data_folder=args.data_folder, use_meta=args.use_meta, use_ext=args.use_ext) transforms_train, transforms_val = get_transforms(args.image_size) LOGITS = [] PROBS = [] TARGETS = [] dfs = [] folds = range(args.k_fold) for fold in folds: print(f'Evaluate data fold{str(fold)}') df_valid = df_train[df_train['fold'] == fold] # batch_normalization에서 배치 사이즈 1인 경우 에러 발생할 수 있으므로, 데이터 한개 버림 if len(df_valid) % args.batch_size == 1: df_valid = df_valid.sample(len(df_valid) - 1) if args.DEBUG: df_valid = pd.concat([ df_valid[df_valid['target'] == target_idx].sample( args.batch_size * 3), df_valid[df_valid['target'] != target_idx].sample( args.batch_size * 3) ]) dataset_valid = MMC_ClassificationDataset(df_valid, 'valid', meta_features, transform=transforms_val) valid_loader = torch.utils.data.DataLoader( dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_no_ext': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_no_ext_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim) model = model.to(device) # model summary if args.use_meta: pass # 코드 확인이 필요함 # summary(model, [(3, args.image_size, args.image_size), n_meta_features]) else: if fold == 0: # 한번만 summary(model, (3, args.image_size, args.image_size)) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() ''' #################################################### # stone data를 위한 평가함수 : val_epoch_stonedata #################################################### ''' this_LOGITS, this_PROBS, this_TARGETS = val_epoch_stonedata( model, valid_loader, target_idx, is_ext=df_valid['is_ext'].values, n_test=8, get_output=True) LOGITS.append(this_LOGITS) PROBS.append(this_PROBS) TARGETS.append(this_TARGETS) dfs.append(df_valid) dfs = pd.concat(dfs).reset_index(drop=True) dfs['pred'] = np.concatenate(PROBS).squeeze()[:, target_idx] Accuracy = (round(dfs['pred']) == dfs['target']).mean() * 100. auc_all_raw = roc_auc_score(dfs['target'] == target_idx, dfs['pred']) dfs2 = dfs.copy() for i in folds: dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i, 'pred'].rank(pct=True) auc_all_rank = roc_auc_score(dfs2['target'] == target_idx, dfs2['pred']) if args.use_ext: # 외부데이터를 사용할 경우, 외부데이터를 제외하고 모델을 따로 평가해본다. dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True) auc_no_ext_raw = roc_auc_score(dfs3['target'] == target_idx, dfs3['pred']) for i in folds: dfs3.loc[dfs3['fold'] == i, 'pred'] = dfs3.loc[dfs3['fold'] == i, 'pred'].rank(pct=True) auc_no_ext_rank = roc_auc_score(dfs3['target'] == target_idx, dfs3['pred']) content = time.ctime() + ' ' + f'Eval {args.eval}:\nAccuracy : {Accuracy:.5f}\n' \ f'auc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\n' \ f'auc_no_ext_raw : {auc_no_ext_raw:.5f}\nauc_no_ext_rank : {auc_no_ext_rank:.5f}\n' else: content = time.ctime() + ' ' + f'Eval {args.eval}:\nAccuracy : {Accuracy:.5f}\n' \ f'AUC_all_raw : {auc_all_raw:.5f}\nAUC_all_rank : {auc_all_rank:.5f}\n' # 로그파일 맨 뒤에 결과 추가해줌 print(content) with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') np.save( os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'), dfs['pred'].values) # 결과 csv 저장 dfs[['filepath', 'patient_id', 'target', 'pred' ]].to_csv(os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.csv'), index=True)
def predict_model(model, attack_strength): df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone( k_fold=args.k_fold, out_dim=args.out_dim, data_dir=args.data_dir, data_folder=args.data_folder, use_meta=args.use_meta, use_ext=args.use_ext) transforms_train, transforms_val = get_transforms(args.image_size, attack_strength) # https://discuss.pytorch.org/t/error-expected-more-than-1-value-per-channel-when-training/26274 # batch_normalization에서 배치 사이즈 1인 경우 에러 발생할 수 있음 # 문제가 발생한 경우 배치 사이즈를 조정해서 해야한다. if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MMC_ClassificationDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) PROBS = [] TARGETS = [] with torch.no_grad(): for (data, target) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test PROBS.append(probs.detach().cpu()) TARGETS.append(target.detach().cpu()) PROBS = torch.cat(PROBS).numpy() TARGETS = torch.cat(TARGETS).numpy() df_test['target'] = PROBS[:, target_idx] acc = (PROBS.argmax(1) == TARGETS).mean() * 100. auc = roc_auc_score((TARGETS == target_idx).astype(float), PROBS[:, target_idx]) df_test[['image_name', 'target']].to_csv(os.path.join( args.sub_dir, f'sub_{args.kernel_type}_{args.eval}_{0}_{acc:.2f}_{auc:.4f}.csv'), index=False) # df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}_{fold}.csv'),index=False) test_loader = None dataset_test = None return auc
def main(): ''' #################################################### # stone data 데이터셋 : dataset.get_df_stone #################################################### ''' df_train, df_test, meta_features, n_meta_features, target_idx = get_df_stone( k_fold=args.k_fold, out_dim=args.out_dim, data_dir=args.data_dir, data_folder=args.data_folder, use_meta=args.use_meta, use_ext=args.use_ext) PROBS = [] folds = range(1) # args.k_fold for fold in folds: if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_no_ext': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_no_ext_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() # ROC Curve auc result = np.zeros(7) cnt = 0 for attack_strength in [7, 11, 15, 21, 25, 27, 31]: result[cnt] = predict_model(model, attack_strength) print(f'as: {attack_strength}, auc: {result[cnt]}') cnt += 1 print("blur Result : ", result)