def main(): df, _ = get_df(args.kernel_type, args.out_dim, args.data_dir) transforms_train, transforms_val = get_transforms(args.image_size) folds = [int(i) for i in args.train_fold.split(',')] run(folds, df, transforms_train, transforms_val)
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta) transforms_train, transforms_val = get_transforms(args.image_size) folds = [int(i) for i in args.fold.split(',')] for fold in folds: run(fold, df, meta_features, n_meta_features, transforms_train, transforms_val, mel_idx)
def main(): # Récupération des données df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta) # Récupérer les augmentations qui nous devons appliquer sur les images transforms_train, transforms_val = get_transforms(args.image_size) # trainer et valider notre réseau de neurone en se basant sur la méthode K- fold folds = [int(i) for i in args.fold.split(',')] for fold in folds: run(fold, df, meta_features, n_meta_features, transforms_train, transforms_val, mel_idx)
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir_2020, args.data_dir_2019, args.data_dir_2018, args.use_meta) transforms_train, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) # load model models = [] for fold in range(1): if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim, pretrained=True, # meta_model=args.meta_model ) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() models.append(model) # predict PROBS = [] with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for model in models: for I in range(args.n_test): l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for model in models: for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test probs /= len(models) PROBS.append(probs.detach().cpu()) PROBS = torch.cat(PROBS).numpy() # save cvs df_test['target'] = PROBS[:, mel_idx] df_test['image_name'] = df_test['image'] df_test[['image_name', 'target' ]].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta ) transforms_train, transforms_val = get_transforms(args.image_size) LOGITS = [] PROBS = [] dfs = [] for fold in range(5): df_valid = df[df['fold'] == fold] if args.DEBUG: df_valid = pd.concat([ df_valid[df_valid['target'] == mel_idx].sample(args.batch_size * 3), df_valid[df_valid['target'] != mel_idx].sample(args.batch_size * 3) ]) dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) if args.eval == 'best': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim ) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()} model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() this_LOGITS, this_PROBS = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values, n_test=8, get_output=True) LOGITS.append(this_LOGITS) PROBS.append(this_PROBS) dfs.append(df_valid) dfs = pd.concat(dfs).reset_index(drop=True) dfs['pred'] = np.concatenate(PROBS).squeeze()[:, mel_idx] auc_all_raw = roc_auc_score(dfs['target'] == mel_idx, dfs['pred']) dfs2 = dfs.copy() for i in range(5): dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i, 'pred'].rank(pct=True) auc_all_rank = roc_auc_score(dfs2['target'] == mel_idx, dfs2['pred']) dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True) auc_20_raw = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred']) for i in range(5): dfs3.loc[dfs3['fold'] == i, 'pred'] = dfs3.loc[dfs3['fold'] == i, 'pred'].rank(pct=True) auc_20_rank = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred']) content = f'Eval {args.eval}:\nauc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\nauc_20_raw : {auc_20_raw:.5f}\nauc_20_rank : {auc_20_rank:.5f}\n' print(content) with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') np.save(os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'), dfs['pred'].values)
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta) transforms_train, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) print(f'\nPredicting test set using {args.enet_type} ...') OUTPUTS = [] for fold in range(5): if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() PROBS = [] with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros( (data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros( (data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test PROBS.append(probs.detach().cpu()) PROBS = torch.cat(PROBS).numpy() OUTPUTS.append(PROBS[:, mel_idx]) # Rank per fold (If you are predicting on your own moles, your don't need to rank the probability) pred = np.zeros(OUTPUTS[0].shape[0]) for probs in OUTPUTS: pred += pd.Series(probs).rank(pct=True).values pred /= len(OUTPUTS) df_test['target'] = pred df_test[['image_name', 'target' ]].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False) print('\nSaved submission in -> ./subs')
def main(): torch.cuda.set_device(1) # get dataframe df, out_dim = get_df(args.kernel_type, args.data_dir, args.train_step) print(f"out_dim = {out_dim}") # get adaptive margin tmp = np.sqrt(1 / np.sqrt(df['landmark_id'].value_counts().sort_index().values)) margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05 # get augmentations transforms_train, transforms_val = get_transforms(args.image_size) print("\ndata augmentation is done!\n") #extract images in folder 0 as demo df_demo_0 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/0')] df_demo_1 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/1')] df_demo_2 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/2')] df_demo_3 = df[df['filepath'].str.startswith('/mnt/data/sjx/CS498_DL_Project/data/train/0/3')] df_demo = df_demo_0.append([df_demo_1, df_demo_2, df_demo_3]) # get train and valid dataset df = df_demo df_train = df[df['fold'] != args.fold] df_valid = df[df['fold'] == args.fold].reset_index(drop=True).query("index % 15==0") dataset_train = LandmarkDataset(df_train, 'train', 'train', transform=transforms_train) dataset_valid = LandmarkDataset(df_valid, 'train', 'val', transform=transforms_val) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True) print("dataset has been prepared!\n") # model print(torch.cuda.current_device()) model = ModelClass(args.enet_type, out_dim=out_dim) model = nn.DataParallel(model, device_ids=[1, 3]).to("cuda:1, 3") # loss func def criterion(logits_m, target): arc = ArcFaceLossAdaptiveMargin(margins=margins, s=80) loss_m = arc(logits_m, target, out_dim) return loss_m # optimizer optimizer = optim.Adam(model.parameters(), lr=args.init_lr) # load pretrained if len(args.load_from) > 0: # Todo: checkpoint = torch.load(args.load_from, map_location=lambda storage, loc: storage.cuda(3)) state_dict = checkpoint['model_state_dict'] state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()} if args.train_step == 1: del state_dict['metric_classify.weight'] model.load_state_dict(state_dict, strict=False) else: model.load_state_dict(state_dict, strict=True) # if 'optimizer_state_dict' in checkpoint: # optimizer.load_state_dict(checkpoint['optimizer_state_dict']) del checkpoint, state_dict torch.cuda.empty_cache() import gc gc.collect() # lr scheduler scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, args.n_epochs-1) scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine) # train & valid loop gap_m_max = 0. model_file = os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}.pth') for epoch in range(args.start_from_epoch, args.n_epochs+1): print(time.ctime(), 'Epoch:', epoch) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, drop_last=True) train_loss = train_epoch(model, train_loader, optimizer, criterion) val_loss, acc_m, gap_m = val_epoch(model, valid_loader, criterion) scheduler_warmup.step(epoch-1) if args.local_rank == 0: content = time.ctime() + ' ' + f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc_m: {(acc_m):.6f}, gap_m: {(gap_m):.6f}.' print(content) with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') print('gap_m_max ({:.6f} --> {:.6f}). Saving model ...'.format(gap_m_max, gap_m)) torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_file) gap_m_max = gap_m if epoch == args.stop_at_epoch: print(time.ctime(), 'Training Finished!') break torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}_final.pth'))
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta ) transforms_train, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) PROBS = [] ## ! this is defined twice? for fold in range(5): # ! use model built from each fold if args.eval == 'best': # ! default model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim ) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()} model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() PROBS = [] ## ! this is defined twice? with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) # batch x label for I in range(args.n_test): # ! fliping images 8 times. l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test # ! average over all the flips PROBS.append(probs.detach().cpu()) ## append prediction for this batch PROBS = torch.cat(PROBS).numpy() ## put in numpy format, PROBS is total_obs_size x num_labels df_test['target'] = PROBS[:, mel_idx] # ! takes @mel_idx column df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def main(): # get dataframe df, out_dim = get_df(args.kernel_type, args.data_dir, args.train_step) print(f"out_dim = {out_dim}") # get adaptive margin tmp = np.sqrt( 1 / np.sqrt(df['landmark_id'].value_counts().sort_index().values)) margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05 # get augmentations transforms_train, transforms_val = get_transforms(args.image_size) # get train and valid dataset df_train = df[df['fold'] != args.fold] df_valid = df[df['fold'] == args.fold].reset_index( drop=True).query("index % 15==0") dataset_train = LandmarkDataset(df_train, 'train', 'train', transform=transforms_train) dataset_valid = LandmarkDataset(df_valid, 'train', 'val', transform=transforms_val) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) # model model = ModelClass(args.enet_type, out_dim=out_dim) model = model.cuda() model = apex.parallel.convert_syncbn_model(model) # loss func def criterion(logits_m, target): arc = ArcFaceLossAdaptiveMargin(margins=margins, s=80) loss_m = arc(logits_m, target, out_dim) return loss_m # optimizer optimizer = optim.Adam(model.parameters(), lr=args.init_lr) if args.use_amp: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") # load pretrained if len(args.load_from) > 0: checkpoint = torch.load(args.load_from, map_location='cuda:{}'.format(args.local_rank)) state_dict = checkpoint['model_state_dict'] state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } if args.train_step == 1: del state_dict['metric_classify.weight'] model.load_state_dict(state_dict, strict=False) else: model.load_state_dict(state_dict, strict=True) # if 'optimizer_state_dict' in checkpoint: # optimizer.load_state_dict(checkpoint['optimizer_state_dict']) del checkpoint, state_dict torch.cuda.empty_cache() import gc gc.collect() model = DistributedDataParallel(model, delay_allreduce=True) # lr scheduler scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args.n_epochs - 1) scheduler_warmup = GradualWarmupSchedulerV2( optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine) # train & valid loop gap_m_max = 0. model_file = os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}.pth') for epoch in range(args.start_from_epoch, args.n_epochs + 1): print(time.ctime(), 'Epoch:', epoch) scheduler_warmup.step(epoch - 1) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) train_sampler.set_epoch(epoch) train_loader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=train_sampler is None, sampler=train_sampler, drop_last=True) train_loss = train_epoch(model, train_loader, optimizer, criterion) val_loss, acc_m, gap_m = val_epoch(model, valid_loader, criterion) if args.local_rank == 0: content = time.ctime( ) + ' ' + f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, valid loss: {(val_loss):.5f}, acc_m: {(acc_m):.6f}, gap_m: {(gap_m):.6f}.' print(content) with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') print('gap_m_max ({:.6f} --> {:.6f}). Saving model ...'.format( gap_m_max, gap_m)) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_file) gap_m_max = gap_m if epoch == args.stop_at_epoch: print(time.ctime(), 'Training Finished!') break torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(args.model_dir, f'{args.kernel_type}_fold{args.fold}_final.pth'))
def main(): df_train, df_test = get_df(args.kernel_type, args.out_dim, args.data_dir) folds = [int(i) for i in args.val_fold.split(',')] if args.mode == 'val': df_test = df_train[df_train['fold'].isin(folds)] _, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = RetinalDataset(df_test, 'val', transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) # nums = dataset_test.get_num() # content = f'total num of test:{len(dataset_test)},class nums:{nums}' # print(content) # load model models = [] for fold in range(1): if args.eval == 'best': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best.pth') elif args.eval == 'final': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final.pth') model = ModelClass(args.enet_type, out_dim=args.out_dim, pretrained=True, freeze_cnn=args.freeze_cnn) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) # if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: # model = torch.nn.DataParallel(model) model.eval() models.append(model) LOGITS = [] with torch.no_grad(): for (data, target) in tqdm(test_loader): data, target = data.to(device), target.to(device) logits = torch.zeros((data.shape[0], args.out_dim)).to(device) for model in models: for I in range(args.n_test): l = model(get_trans(data, I)) logits += l logits /= args.n_test logits /= len(models) logits = F.sigmoid(logits) LOGITS.append(logits.detach().cpu()) LOGITS = torch.cat(LOGITS).numpy() PREDICT = [] for logit in LOGITS: predicted_label = [] for i in range(logit.size): if logit[i].item() > 0.5: # Threshold is 0.5 predicted_label.append(i) if len(predicted_label) == 0: predicted_label.append(0) PREDICT.append(" ".join([str(label) for label in predicted_label])) df_test['predicted'] = PREDICT df_test[['filename', 'predicted' ]].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def main(args): # get dataframe df = get_df(args.groups) # get adaptive margin tmp = np.sqrt( 1 / np.sqrt(df['label_group'].value_counts().sort_index().values)) margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05 # get augmentations transforms_train, transforms_val = get_transforms(args.image_size, args.stage) # get train and valid dataset df_train = df[df['fold'] != args.fold] if not args.full else df df_train['label_group'] = LabelEncoder().fit_transform( df_train.label_group) df_valid = df[df['fold'] == args.fold] out_dim = df_train.label_group.nunique() print(f"out_dim = {out_dim}") dataset_train = ShoppeDataset(df_train, 'train', transform=transforms_train) dataset_valid = ShoppeDataset(df_valid, 'val', transform=transforms_val) print( f'Train on {len(df_train)} images, validate on {len(df_valid)} images') train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True, shuffle=True, drop_last=True) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) loss_config = decode_config(args.loss_config) # model if args.enet_type == 'resnest50': model = Resnest50(out_dim=out_dim, loss_config=loss_config, args=args) else: model = Model(args.enet_type, out_dim=out_dim, loss_config=loss_config, args=args) model = model.cuda() # loss func criterion = get_criterion(args, out_dim, margins) # optimizer optimizer = optim.AdamW(model.parameters(), lr=args.init_lr) # load pretrained if args.load_from and args.load_from != 'none': checkpoint = torch.load(args.load_from, map_location='cuda:0') state_dict = checkpoint['model_state_dict'] state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) del checkpoint, state_dict torch.cuda.empty_cache() gc.collect() print(f"Loaded weight from {args.load_from}") # lr scheduler scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args.n_epochs - 1) warmup_epochs = args.warmup_epochs if args.stage == 1 else 1 print(warmup_epochs) scheduler_warmup = GradualWarmupSchedulerV2( optimizer, multiplier=10, total_epoch=warmup_epochs, after_scheduler=scheduler_cosine) # train & valid loop best_score = -1 model_file = os.path.join( args.model_dir, weight_file(args.kernel_type, args.fold, args.stage, loss_config.loss_type, out_dim)) for epoch in range(args.start_from_epoch, args.n_epochs + 1): print(time.ctime(), f'Epoch: {epoch}/{args.n_epochs}') scheduler_warmup.step(epoch - 1) train_loss, acc_list = train_epoch(model, train_loader, optimizer, criterion) f1score = val_epoch(model, valid_loader, criterion, df_valid, args) content = time.ctime() + ' ' + \ ( f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f},' f' train acc {np.mean(acc_list):.5f}, f1score: {(f1score):.6f}.') print(content) with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') if f1score > best_score: print('best f1 score ({:.6f} --> {:.6f}). Saving model ...'.format( best_score, f1score)) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_file) best_score = f1score if epoch == args.stop_at_epoch: print(time.ctime(), 'Training Finished!') break torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_file)