def main(): train_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2() ]) val_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2() ]) model = UNET(in_channels=3, out_channels=1).to(DEVICE) loss_fn = nn.BCEWithLogitsLoss() loss_fn = ComboLoss({'bce': 0.4, 'dice': 0.5, 'focal': 0.1}) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders(TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR, BATCH_SIZE, train_transforms, val_transforms) if LOAD_MODEL: load_checkpoint(torch.load('checkpoint.pth'), model) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) #save model checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict() } # save_checkpoint(checkpoint) #check accuracy check_accuracy(val_loader, model, DEVICE) #print some example to a folder save_predictions_as_imgs(val_loader, model, device=DEVICE)
val_data_loader = DataLoader(val_train, batch_size=batch_size, num_workers=2, shuffle=False, pin_memory=True) model = nn.DataParallel(Dpn92_9ch_Unet()).cuda() params = model.parameters() optimizer = AdamW(params, lr=1e-4, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[4, 12, 22], gamma=0.5) loss_function = ComboLoss({ 'dice': 1.0, 'focal': 10.0 }, per_image=True).cuda() l1_loss = torch.nn.SmoothL1Loss().cuda() best_score = 0 for epoch in range(25): train_epoch(epoch, loss_function, l1_loss, model, optimizer, scheduler, train_data_loader) torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, }, path.join(models_folder, last_snapshot_name)) torch.cuda.empty_cache()
train_data_loader = DataLoader(data_train, batch_size=batch_size, num_workers=12, shuffle=True, pin_memory=True, drop_last=True) val_data_loader = DataLoader(val_train, batch_size=val_batch_size, num_workers=12, shuffle=False, pin_memory=False) model = SeResNext50_Unet_9ch() #.cuda() params = model.parameters() optimizer = AdamW(params, lr=0.0004, weight_decay=1e-4) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[6, 12, 18, 24, 26], gamma=0.5) model = nn.DataParallel(model).cuda() seg_loss = ComboLoss({'dice': 1.0, 'focal': 4.0}, per_image=True).cuda() ce_loss = nn.CrossEntropyLoss().cuda() mse_loss = nn.MSELoss().cuda() best_score = 0 _cnt = -1 for epoch in range(27): train_epoch(epoch, seg_loss, ce_loss, mse_loss, model, optimizer, scheduler, train_data_loader) if epoch % 2 == 0: _cnt += 1 # torch.save({ # 'epoch': epoch + 1, # 'state_dict': model.state_dict(), # 'best_score': best_score, # }, path.join(models_folder, snapshot_name + '_{}'.format(_cnt % 3))) torch.cuda.empty_cache()
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5) ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('se_resnext50_32x4d', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = ComboLoss({ 'bce': 1, 'dice': 1, 'focal': 1 }, channel_weights=[1, 1, 1, 1]) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) if epoch % (CLR_CYCLE * 2) == 0: LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
checkpoint = torch.load(path.join(models_folder, snap_to_load), map_location='cpu') loaded_dict = checkpoint['state_dict'] sd = model.state_dict() for k in model.state_dict(): if k in loaded_dict and sd[k].size() == loaded_dict[k].size(): sd[k] = loaded_dict[k] loaded_dict = sd model.load_state_dict(loaded_dict) print("loaded checkpoint '{}' (epoch {}, best_score {})".format( snap_to_load, checkpoint['epoch'], checkpoint['best_score'])) del loaded_dict del sd del checkpoint gc.collect() torch.cuda.empty_cache() seg_loss = ComboLoss({'dice': 0.5, 'focal': 2.0}, per_image=False).cuda() ce_loss = nn.CrossEntropyLoss().cuda() best_score = 0 torch.cuda.empty_cache() for epoch in range(2): train_epoch(epoch, seg_loss, ce_loss, model, optimizer, scheduler, train_data_loader) torch.cuda.empty_cache() best_score = evaluate_val(val_data_loader, best_score, model, snapshot_name, epoch) elapsed = timeit.default_timer() - t0 print('Time: {:.3f} min'.format(elapsed / 60))
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(-1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(-1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(-1, 1) y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1) y = np.concatenate([y1, y2, y3, y4], axis=1) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100,140), p=0.5), RandomBrightnessContrast(p=0.5), ], p=0.5), OneOf([ GaussNoise(p=0.5), ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = ComboLoss({'bce': 4, 'dice': 1, 'focal': 3}, channel_weights=[1, 1, 1, 1]) optimizer = torch.optim.Adam([ {'params': model.decoder.parameters(), 'lr': 3e-3}, {'params': model.encoder.parameters(), 'lr': 3e-4}, ]) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt+1 for epoch in range(54, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save(model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format(EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save(model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format(round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format(round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
checkpoint = torch.load(path.join(models_folder, snap_to_load), map_location='cpu') loaded_dict = checkpoint['state_dict'] sd = model.state_dict() for k in model.state_dict(): if k in loaded_dict and sd[k].size() == loaded_dict[k].size(): sd[k] = loaded_dict[k] loaded_dict = sd model.load_state_dict(loaded_dict) print("loaded checkpoint '{}' (epoch {}, best_score {})".format( snap_to_load, checkpoint['epoch'], checkpoint['best_score'])) del loaded_dict del sd del checkpoint gc.collect() torch.cuda.empty_cache() seg_loss = ComboLoss({'dice': 0.5}, per_image=False).cuda() ce_loss = nn.CrossEntropyLoss().cuda() best_score = 0 torch.cuda.empty_cache() for epoch in range(2): train_epoch(epoch, seg_loss, ce_loss, model, optimizer, scheduler, train_data_loader) torch.cuda.empty_cache() best_score = evaluate_val(val_data_loader, best_score, model, snapshot_name, epoch) elapsed = timeit.default_timer() - t0 print('Time: {:.3f} min'.format(elapsed / 60))
params = model.parameters() optimizer = AdamW(params, lr=0.00015, weight_decay=1e-6) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[ 5, 11, 17, 25, 33, 47, 50, 60, 70, 90, 110, 130, 150, 170, 180, 190 ], gamma=0.5) model = nn.DataParallel(model).cuda() seg_loss = ComboLoss({ 'dice': 1.0, 'focal': 10.0 }, per_image=False).cuda() #True best_score = 0 _cnt = -1 torch.cuda.empty_cache() for epoch in range(55): train_epoch(epoch, seg_loss, model, optimizer, scheduler, train_data_loader) if epoch % 2 == 0: _cnt += 1 torch.cuda.empty_cache() best_score = evaluate_val(val_data_loader, best_score, model, snapshot_name, epoch) elapsed = timeit.default_timer() - t0
device = torch.device('cuda:0') model = AlbuNet().to(device) checkpoint_path = "../checkpoints/seg-9.pth" model.load_state_dict(torch.load(checkpoint_path)) trainloader = DataLoader( SIIMDataset("../dataset"), batch_size=4, num_workers=4, pin_memory=True, shuffle=True, ) optimizer = optim.Adam(model.parameters(), lr=0.00001) criterion = ComboLoss({'bce': 3, 'dice': 1, 'focal': 4}) # for idx, batch in enumerate(trainloader): # images, masks = batch # images = images.to(device).type(torch.float32) # out = model(images) # out = torch.sigmoid(out) # print(images.shape, out.shape) # ms = np.reshape(out.cpu().detach().numpy(), (512, 512)) # ms = np.where(ms > 0.3, 1, 0).astype(np.float32) # cv2.imshow("", ms) # cv2.imshow("original", np.reshape(masks.detach().numpy(), (512, 512))) # cv2.waitKey(0) num_epochs = 10 grad_accum = 1