def get_datasets(data): X_train, y_train, X_val, y_val = data datasets = {} datasets["train"] = MelanomaDataset( X_train, y_train, istrain=True, transforms=get_train_transforms() ) datasets["valid"] = MelanomaDataset( X_val, y_val, istrain=False, transforms=get_valid_transforms() ) return datasets
def GetDataLoader(): df = pd.read_csv('train.csv') train_df , valid_df = get_train_val_split(df) if parser['augmentations']: train_transforms = transforms.Compose([ transforms.ColorJitter(brightness = 0.7 , contrast = 0.3), transforms.RandomRotation(degrees = 75), transforms.RandomHorizontalFlip(p = 0.6), transforms.RandomVerticalFlip(p = 0.7), transforms.ToTensor(), transforms.Normalize(mean = (0.485, 0.456, 0.406) , std = (0.229, 0.224, 0.225))]) else: train_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean = (0.485, 0.456, 0.406) , std = (0.229, 0.224, 0.225))]) train_dataset = MelanomaDataset(df = train_df, path = parser['train_path'], transformations= train_transforms, is_train = True) trainloader = torch.utils.data.DataLoader(train_dataset , batch_size = parser['bs'], shuffle = True) valid_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean = (0.485, 0.456, 0.406) , std = (0.229, 0.224, 0.225))]) valid_dataset = MelanomaDataset(df = valid_df, path = parser['train_path'], transformations= valid_transforms, is_train = True) validloader = torch.utils.data.DataLoader(valid_dataset , batch_size= parser['batchsize'] , shuffle = False) return trainloader , validloader
def GetPreds(model_path, img_path, backbone): df = pd.read_csv('test.csv') test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) test_data = MelanomaDataset(df=df, path=img_path, transformations=test_transforms, is_train=False) test_loader = torch.utils.data.DataLoader(test_data, batch_size=16) net = MelanomaModel(backbone=backbone) net.load_state_dict(torch.load(model_path)) preds = [] with torch.no_grad(): for img in test_loader: if torch.cuda.is_available(): img = img.cuda() logits = net(img) preds.extend(logits.cpu().detach().numpy().tolist()) preds = np.array(preds) preds = sigmoid(preds) return preds
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir_2020, args.data_dir_2019, args.data_dir_2018, args.use_meta) transforms_train, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) # load model models = [] for fold in range(1): if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim, pretrained=True, # meta_model=args.meta_model ) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() models.append(model) # predict PROBS = [] with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for model in models: for I in range(args.n_test): l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for model in models: for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test probs /= len(models) PROBS.append(probs.detach().cpu()) PROBS = torch.cat(PROBS).numpy() # save cvs df_test['target'] = PROBS[:, mel_idx] df_test['image_name'] = df_test['image'] df_test[['image_name', 'target' ]].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta ) transforms_train, transforms_val = get_transforms(args.image_size) LOGITS = [] PROBS = [] dfs = [] for fold in range(5): df_valid = df[df['fold'] == fold] if args.DEBUG: df_valid = pd.concat([ df_valid[df_valid['target'] == mel_idx].sample(args.batch_size * 3), df_valid[df_valid['target'] != mel_idx].sample(args.batch_size * 3) ]) dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) if args.eval == 'best': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim ) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()} model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() this_LOGITS, this_PROBS = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values, n_test=8, get_output=True) LOGITS.append(this_LOGITS) PROBS.append(this_PROBS) dfs.append(df_valid) dfs = pd.concat(dfs).reset_index(drop=True) dfs['pred'] = np.concatenate(PROBS).squeeze()[:, mel_idx] auc_all_raw = roc_auc_score(dfs['target'] == mel_idx, dfs['pred']) dfs2 = dfs.copy() for i in range(5): dfs2.loc[dfs2['fold'] == i, 'pred'] = dfs2.loc[dfs2['fold'] == i, 'pred'].rank(pct=True) auc_all_rank = roc_auc_score(dfs2['target'] == mel_idx, dfs2['pred']) dfs3 = dfs[dfs.is_ext == 0].copy().reset_index(drop=True) auc_20_raw = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred']) for i in range(5): dfs3.loc[dfs3['fold'] == i, 'pred'] = dfs3.loc[dfs3['fold'] == i, 'pred'].rank(pct=True) auc_20_rank = roc_auc_score(dfs3['target'] == mel_idx, dfs3['pred']) content = f'Eval {args.eval}:\nauc_all_raw : {auc_all_raw:.5f}\nauc_all_rank : {auc_all_rank:.5f}\nauc_20_raw : {auc_20_raw:.5f}\nauc_20_rank : {auc_20_rank:.5f}\n' print(content) with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') np.save(os.path.join(args.oof_dir, f'{args.kernel_type}_{args.eval}_oof.npy'), dfs['pred'].values)
def run(fold, df, meta_features, n_meta_features, transforms_train, transforms_val, mel_idx): if args.DEBUG: args.n_epochs = 5 df_train = df[df['fold'] != fold].sample(args.batch_size * 5) df_valid = df[df['fold'] == fold].sample(args.batch_size * 5) else: df_train = df[df['fold'] != fold] df_valid = df[df['fold'] == fold] dataset_train = MelanomaDataset(df_train, 'train', meta_features, transform=transforms_train) dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val) train_loader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, sampler=RandomSampler(dataset_train), num_workers=args.num_workers) # 随机不重复采样 valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim, pretrained=True) if DP: model = apex.parallel.convert_syncbn_model(model) model = model.to(device) auc_max = 0. auc_20_max = 0. model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') model_file2 = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') model_file3 = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') optimizer = optim.Adam(model.parameters(), lr=args.init_lr) if args.use_amp: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") if DP: model = nn.DataParallel(model) # scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs - 1) scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args.n_epochs - 1) scheduler_warmup = GradualWarmupSchedulerV2( optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine) print(len(dataset_train), len(dataset_valid)) for epoch in range(1, args.n_epochs + 1): print(time.ctime(), f'Epoch {epoch}', f'Fold {fold}') # scheduler_warmup.step(epoch - 1) train_loss = train_epoch(model, train_loader, optimizer) val_loss, acc, auc, auc_20 = val_epoch( model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values) content = time.ctime( ) + ' ' + f'Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.' print(content) with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') scheduler_warmup.step() if epoch == 2: scheduler_warmup.step() # bug workaround if auc > auc_max: print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format( auc_max, auc)) torch.save(model.state_dict(), model_file) auc_max = auc if auc_20 > auc_20_max: print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format( auc_20_max, auc_20)) torch.save(model.state_dict(), model_file2) auc_20_max = auc_20 torch.save(model.state_dict(), model_file3)
def main(model_type='resnet', n_epochs=20, lr=0.0005, batch_size=32): """ The main function. """ #set file paths train_img_path = '/Users/emmarydholm/Documents/code/melanoma_classification/data_added_melanoma/train/train_resized' #'/data/train_resized/' #path to resized train image test_img_path = '/Users/emmarydholm/Documents/code/melanoma_classification/data_added_melanoma/test/test_resized' #'/data/test_resized/' #path to resized train image data_train = pd.read_csv( 'data/train_processed.csv') #path to processed csv file for train data data_test = pd.read_csv( 'data/test_processed.csv') #path to processed csv file for test data #split data_train into train and validation n_data_train = len(data_train) split = int(0.2 * n_data_train) data_train, data_valid = data_train.iloc[split:], data_train.iloc[0:split] #transformation for test and validation data transform_valid = Compose([ CenterCrop( 224), # Crops out the center, resulting image shape is 224x224 ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #augmentations for the training data transform_train = Compose([ CenterCrop(224), RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3, fill=0), RandomVerticalFlip(p=0.5), RandomHorizontalFlip(p=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #create the datasets dataset_train = MelanomaDataset(data_train, train_img_path, transform=transform_train) dataset_valid = MelanomaDataset(data_valid, train_img_path, transform=transform_valid) dataset_test = MelanomaTestDataset(data_test, test_img_path, transform=transform_valid) #create the batches with dataloader training_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True) validation_loader = DataLoader(dataset_valid, batch_size=batch_size, shuffle=True) #test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False) print('There is ', len(dataset_train), 'images in train set and ', \ len(dataset_valid), 'in dev set.') #define device if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' #define model and freeze the deepest layers if model_type == 'resnet': model = ResnetModel(9) no_train_layers = [ model.cnn.layer1, model.cnn.layer2, model.cnn.layer3 ] for layer in no_train_layers: for param in layer: param.requires_grad = False elif model_type == 'efficientnet': model = EfficientNetModel(9) model.cnn._conv_stem.requires_grad = False no_train_layers = model.cnn._blocks[:28] for layer in no_train_layers: #for param in layer: layer.requires_grad = False model = model.to(device) #define loss function loss_function = torch.nn.BCEWithLogitsLoss() #define optimizer optimizer = optim.Adam(model.parameters(), lr=lr) #define scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=1) train_loss = [] validation_loss = [] train_auc = [] val_auc = [] best_auc = 0.0 #training loop for i in range(n_epochs): t1, v1, t_auc, v_auc = train_epoch(training_loader, validation_loader, model, loss_function, optimizer, device) print(f"\r Epoch {i+1}: Training loss = {t1}, Validation loss = {v1}, \ \n Train auc = {t_auc}, Validation_auc = {v_auc}") print('lr = ', optimizer.param_groups[0]['lr']) train_loss.append(t1) validation_loss.append(v1) train_auc.append(t_auc) val_auc.append(v_auc) scheduler.step(v_auc) # save best model if v_auc > best_auc: torch.save(model, '/best_model.pt') best_auc = v_auc print('model saved') #plot the result epochs = np.arange(n_epochs) fig, ax = plt.subplots() ax.set_title('Training and Validation losses') ax.plot(epochs, train_loss, label='Train') ax.plot(epochs, validation_loss, label='Dev') plt.legend() fig, ax = plt.subplots() ax.set_title('Training and Validation ROC AUC') ax.plot(epochs, train_auc, label='Train') ax.plot(epochs, val_auc, label='Dev') plt.legend()
def predict_image(image_path): OUTPUTS = [] n_test = 8 transforms_train, transforms_val = get_transforms(config.image_size) dataset_test = MelanomaDataset(None, 'test', None, transform=transforms_val, image_path=image_path) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=1, num_workers=0) for fold in range(5): model_file = os.path.join( config.model_dir, f'{config.kernel_type}_best_o_fold{fold}.pth') ModelClass = Effnet_Melanoma model = ModelClass(config.enet_type, out_dim=config.out_dim) model = model.to(config.device) try: # single GPU model_file model.load_state_dict(torch.load(model_file, map_location=config.device), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file, map_location=config.device) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) model.eval() LOGITS = [] PROBS = [] with torch.no_grad(): for (data) in tqdm(test_loader): if config.use_meta: data, meta = data data, meta = data.to(config.device), meta.to(config.device) logits = torch.zeros( (data.shape[0], config.out_dim)).to(config.device) probs = torch.zeros( (data.shape[0], config.out_dim)).to(config.device) for I in range(n_test): l = model(get_trans(data, I), meta) logits += l probs += l.softmax(1) else: data = data.to(config.device) logits = torch.zeros( (data.shape[0], config.out_dim)).to(config.device) probs = torch.zeros( (data.shape[0], config.out_dim)).to(config.device) for I in range(n_test): l = model(get_trans(data, I)) logits += l probs += l.softmax(1) logits /= n_test probs /= n_test LOGITS.append(logits.detach().cpu()) PROBS.append(probs.detach().cpu()) LOGITS = torch.cat(LOGITS).numpy() PROBS = torch.cat(PROBS).numpy() OUTPUTS.append(PROBS[:, config.mel_idx]) #If you are predicting on your own moles, your don't need to rank the probability pred = np.zeros(OUTPUTS[0].shape[0]) for probs in OUTPUTS: pred += probs pred /= len(OUTPUTS) return round(pred[0], 8)
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta) transforms_train, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) print(f'\nPredicting test set using {args.enet_type} ...') OUTPUTS = [] for fold in range(5): if args.eval == 'best': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join( args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() PROBS = [] with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros( (data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros( (data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test PROBS.append(probs.detach().cpu()) PROBS = torch.cat(PROBS).numpy() OUTPUTS.append(PROBS[:, mel_idx]) # Rank per fold (If you are predicting on your own moles, your don't need to rank the probability) pred = np.zeros(OUTPUTS[0].shape[0]) for probs in OUTPUTS: pred += pd.Series(probs).rank(pct=True).values pred /= len(OUTPUTS) df_test['target'] = pred df_test[['image_name', 'target' ]].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False) print('\nSaved submission in -> ./subs')
def main(): df, df_test, meta_features, n_meta_features, mel_idx = get_df( args.kernel_type, args.out_dim, args.data_dir, args.data_folder, args.use_meta ) transforms_train, transforms_val = get_transforms(args.image_size) if args.DEBUG: df_test = df_test.sample(args.batch_size * 3) dataset_test = MelanomaDataset(df_test, 'test', meta_features, transform=transforms_val) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, num_workers=args.num_workers) PROBS = [] ## ! this is defined twice? for fold in range(5): # ! use model built from each fold if args.eval == 'best': # ! default model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') elif args.eval == 'best_20': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') if args.eval == 'final': model_file = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') model = ModelClass( args.enet_type, n_meta_features=n_meta_features, n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim ) model = model.to(device) try: # single GPU model_file model.load_state_dict(torch.load(model_file), strict=True) except: # multi GPU model_file state_dict = torch.load(model_file) state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()} model.load_state_dict(state_dict, strict=True) if len(os.environ['CUDA_VISIBLE_DEVICES']) > 1: model = torch.nn.DataParallel(model) model.eval() PROBS = [] ## ! this is defined twice? with torch.no_grad(): for (data) in tqdm(test_loader): if args.use_meta: data, meta = data data, meta = data.to(device), meta.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) # batch x label for I in range(args.n_test): # ! fliping images 8 times. l = model(get_trans(data, I), meta) probs += l.softmax(1) else: data = data.to(device) probs = torch.zeros((data.shape[0], args.out_dim)).to(device) for I in range(args.n_test): l = model(get_trans(data, I)) probs += l.softmax(1) probs /= args.n_test # ! average over all the flips PROBS.append(probs.detach().cpu()) ## append prediction for this batch PROBS = torch.cat(PROBS).numpy() ## put in numpy format, PROBS is total_obs_size x num_labels df_test['target'] = PROBS[:, mel_idx] # ! takes @mel_idx column df_test[['image_name', 'target']].to_csv(os.path.join(args.sub_dir, f'sub_{args.kernel_type}_{args.eval}.csv'), index=False)
def run(fold, df, meta_features, n_meta_features, transforms_train, transforms_val, mel_idx): # en suivant le méthode k fold : if args.DEBUG: args.n_epochs = 5 # la validation se fait la paquet de données dont l'id est fold # le reste des paquets on l'utilise pour le training df_train = df[df['fold'] != fold].sample(args.batch_size * 5) df_valid = df[df['fold'] == fold].sample(args.batch_size * 5) else: df_train = df[df['fold'] != fold] df_valid = df[df['fold'] == fold] # on instantie nous objet dataset (Training + Validation) dataset_train = MelanomaDataset(df_train, 'train', meta_features, transform=transforms_train) dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val) # on instantie nous data loader (training validation ) train_loader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, sampler=RandomSampler(dataset_train), num_workers=args.num_workers) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) # on instantie notre model model = ModelClass( args.enet_type, # ex : Resnet n_meta_features= n_meta_features, # ex ['sex', 'age_approx', 'n_images', 'image_size'] n_meta_dim=[int(nd) for nd in args.n_meta_dim.split(',')], out_dim=args.out_dim, pretrained=True) if DP: model = apex.parallel.convert_syncbn_model(model) model = model.to(device) # on instantie nous variables de précisions auc_max = 0. auc_20_max = 0. # on définie les fichiers dans les quels on stocke les paramètres modèles model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') model_file2 = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') model_file3 = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') optimizer = optim.Adam(model.parameters(), lr=args.init_lr) if args.use_amp: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") if DP: model = nn.DataParallel(model) # scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs - 1) scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args.n_epochs - 1) scheduler_warmup = GradualWarmupSchedulerV2( optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine) print(len(dataset_train), len(dataset_valid)) for epoch in range(1, args.n_epochs + 1): print(time.ctime(), f'Fold {fold}, Epoch {epoch}') # scheduler_warmup.step(epoch - 1) # train loss train_loss = train_epoch(model, train_loader, optimizer) # validation loss val_loss, acc, auc, auc_20 = val_epoch( model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values) content = time.ctime( ) + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.' print(content) with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') scheduler_warmup.step() if epoch == 2: scheduler_warmup.step() # bug workaround # on stocke les paramètres model dans les fichiers correspondants if auc > auc_max: print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format( auc_max, auc)) torch.save(model.state_dict(), model_file) auc_max = auc if auc_20 > auc_20_max: print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format( auc_20_max, auc_20)) torch.save(model.state_dict(), model_file2) auc_20_max = auc_20 # on stocke les paramètres model dont la précision maximale dans le fichier model_file3 torch.save(model.state_dict(), model_file3)
# 0.229, 0.224, 0.225])]) test_transform = A.Compose([ A.JpegCompression(p=0.5), A.RandomSizedCrop(min_max_height=(int(resolution*0.9), int(resolution*1.1)), height=resolution, width=resolution, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Transpose(p=0.5), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2(), ], p=1.0) t_dataset=MelanomaDataset(df=df, imfolder=test, train=False, transforms=test_transform, meta_features=meta_features) print('Length of test set is {}'.format(len(t_dataset))) testloader=DataLoader(t_dataset, batch_size=8, shuffle=False, num_workers=8) """Testing""" # model = ResNetModel()() # model = EfficientModel() # model = EfficientModel(n_meta_features=len(meta_features)) model = Model(arch='efficientnet-b1') # model.load_state_dict(torch.load("../checkpoint/fold_1/efficient_256/efficientb0_256_14_0.9212.pth", map_location=torch.device(device))) model.load_state_dict(torch.load("..//checkpoint/fold_1/efficient_320/efficientb1_320_14_0.9293.pth", map_location=torch.device(device))) model.to(device) model.eval()
def run(fold, df, meta_features, n_meta_features, transforms_train, transforms_val, mel_idx): if args.DEBUG: args.n_epochs = 5 df_train = df[df['fold'] != fold].sample(args.batch_size * 5) df_valid = df[df['fold'] == fold].sample(args.batch_size * 5) else: df_train = df[df['fold'] != fold] df_valid = df[df['fold'] == fold] dataset_train = MelanomaDataset(df_train, 'train', meta_features, transform=transforms_train) dataset_valid = MelanomaDataset(df_valid, 'valid', meta_features, transform=transforms_val) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, sampler=RandomSampler(dataset_train), num_workers=args.num_workers) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) model = ModelClass() model = model.to(device) auc_max = 0. auc_20_max = 0. model_file = os.path.join(args.model_dir, f'{args.kernel_type}_best_fold{fold}.pth') model_file2 = os.path.join(args.model_dir, f'{args.kernel_type}_best_20_fold{fold}.pth') model_file3 = os.path.join(args.model_dir, f'{args.kernel_type}_final_fold{fold}.pth') optimizer = optim.AdamW(model.parameters(), lr=args.init_lr,weight_decay=args.weight_decay) scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, args.n_epochs - 1) scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine) print(len(dataset_train), len(dataset_valid)) print('Continuing with model from ' + model_file3) try: checkpoint = torch.load(model_file) model.load_state_dict(checkpoint,strict=False) except: print('error') pass for epoch in range(1, args.n_epochs + 1): print(time.ctime(), f'Fold {fold}, Epoch {epoch}') # scheduler_warmup.step(epoch - 1) train_loss = train_epoch(model, train_loader, optimizer) val_loss, acc, auc, auc_20 = val_epoch(model, valid_loader, mel_idx, is_ext=df_valid['is_ext'].values) content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {(val_loss):.5f}, acc: {(acc):.4f}, auc: {(auc):.6f}, auc_20: {(auc_20):.6f}.' print(content) with open(os.path.join(args.log_dir, f'log_{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') scheduler_warmup.step() if epoch == 2: scheduler_warmup.step() # bug workaround if auc > auc_max: print('auc_max ({:.6f} --> {:.6f}). Saving model ...'.format(auc_max, auc)) torch.save(model.state_dict(), model_file) auc_max = auc if auc_20 > auc_20_max: print('auc_20_max ({:.6f} --> {:.6f}). Saving model ...'.format(auc_20_max, auc_20)) torch.save(model.state_dict(), model_file2) auc_20_max = auc_20 torch.save({ 'net': model.state_dict(), 'optimizer': optimizer.state_dict(), }, model_file3)
df_train=df[df['fold'] != fold] df_valid=df[df['fold'] == fold] class_sample_count = np.array([len(np.where(df_train["target"]==t)[0]) for t in np.unique(df_train["target"])]) print(class_sample_count) # weight = 1. / class_sample_count # samples_weight = np.array([weight[t] for t in df_train["target"]]) # samples_weight = torch.from_numpy(samples_weight) # sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight)) # print(samples_weight) t_dataset=MelanomaDataset(df=df_train, imfolder=train, train=True, transforms=train_transform, meta_features=meta_features) v_dataset=MelanomaDataset(df=df_valid, imfolder=train, train=True, transforms=valid_transform, meta_features=meta_features) print('Length of training and validation set are {} {}'.format( len(t_dataset), len(v_dataset))) trainloader=DataLoader(t_dataset, batch_size=32, shuffle=True, num_workers=8) validloader=DataLoader(v_dataset, batch_size=32, shuffle=False, num_workers=8) """ Training """ # model = ResNetModel() # model = EfficientModelwithoutMeta() model = Model(arch='efficientnet-b2') # model = EfficientModel(n_meta_features=len(meta_features)) model.to(device)
# generate the meta data features for the train and tests sets train_feat, test_feat = gen_train_test_feat(train_csv, test_csv) # generate stratified splits using fixed random seed skf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=408) dummy_X = np.zeros(len(train_csv)) train_y = train_csv['target'] # if this isn't a cropped experiment we can create a single test loader # but in the case of a cropped experiment the test set changes for each fold if not args.cropped: test_dset = MelanomaDataset(test_csv, test_imgs, test_feat, train=False, labels=False, transform=test_transform, chip=args.chipped) test_loader = DataLoader(dataset=test_dset, batch_size=args.test_batch_size, shuffle=False, num_workers=args.num_workers) # create arrays for final test set predictions and best fold performances final_preds = torch.zeros(len(test_csv)) fold_aucs = [] for fold, (train_idx, valid_idx) in enumerate(skf.split(X=dummy_X, y=train_y), 1):