def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDatasetTest(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): models = [] model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) models.append(model) with timer('predict'): rles, sub_ids = predict(models, val_loader, device) sub_df = pd.DataFrame({ 'ImageId_ClassId': sub_ids, 'EncodedPixels': rles }) LOGGER.info(sub_df.head()) sub_df.to_csv('{}_{}.csv'.format(EXP_ID, FOLD_ID), index=False)
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) df_ = pd.read_csv(SOFT_PATH) df = df[[ID_COLUMNS, "fold_id"]].merge(df_, how="left", on=ID_COLUMNS) df = df.append(pd.read_csv(PSEUDO_PATH)).reset_index(drop=True) for c in [ "EncodedPixels_1", "EncodedPixels_2", "EncodedPixels_3", "EncodedPixels_4" ]: df[c] = df[c].astype(str) df["fold_id"] = df["fold_id"].fillna(FOLD_ID + 1) y = (df.sum_target != 0).astype("float32").values with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), ], p=0.5), OneOf([ GaussNoise(p=0.5), ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss, val_score = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) LOGGER.info('Mean valid score: {}'.format(round(val_score, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save( model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) if N_CLASSES == 3: df.drop("EncodedPixels_2", axis=1, inplace=True) df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"}) df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"}) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model.load_state_dict(torch.load(base_model)) model.to(device) model.eval() criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] all_scores = [] min_sizes = [300, 0, 600, 1600] for i in range(N_CLASSES): if i == 1: continue best = 0 count = 0 min_size = min_sizes[i] for th in [0.7+i*0.01 for i in range(30)]: val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) scores_ = [] all_scores_ = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): y_pred_ = post_process(y_pred_ > 0.5, y_pred_, min_size, th) score = dice(y_val_, y_pred_) if np.isnan(score): scores_.append(1) else: scores_.append(score) LOGGER.info('dice={} on {}'.format(np.mean(scores_), th)) if np.mean(scores_) >= best: best = np.mean(scores_) count = 0 else: count += 1 if count == 3: break scores.append(best) all_scores.append(all_scores_) LOGGER.info('holdout dice={}'.format(np.mean(scores)))
classification=CLASSIFICATION, attention_type="cbam", center=True) model.load_state_dict(torch.load(model_path)) model.to(device) model.eval() models.append(model) del model torch.cuda.empty_cache() for model_path in model_pathes2: model = smp_old.Unet('resnet34', encoder_weights=None, classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model.load_state_dict(torch.load(model_path)) model.to(device) model.eval() models.append(model) del model torch.cuda.empty_cache() with timer('predict'): rles, sub_ids = predict_dsv(models, test_loader, device) sub_df_ = pd.DataFrame({'ImageId_ClassId': sub_ids, 'EncodedPixels': rles}) LOGGER.info(len(sub_df_))
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): models = [] for p in base_model_res: model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model.load_state_dict(torch.load(p)) model.to(device) model.eval() models.append(model) model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model.load_state_dict(torch.load(base_model_res_old)) model.to(device) model.eval() models.append(model) criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict( models, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] all_scores = [] for i, th in enumerate(ths): sum_val_preds = np.sum( y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1) best = 0 for n_th, remove_mask_pixel in enumerate( [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800]): val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) val_preds_[sum_val_preds < remove_mask_pixel] = 0 scores_ = [] all_scores_ = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): score = dice(y_val_, y_pred_ > 0.5) if np.isnan(score): scores_.append(1) else: scores_.append(score) LOGGER.info('dice={} on {}'.format(np.mean(scores_), remove_mask_pixel)) if np.mean(scores_) >= best: best = np.mean(scores_) all_scores_.append(np.mean(scores_)) scores.append(np.mean(scores_)) all_scores.append(all_scores_) LOGGER.info('holdout dice={}'.format(np.mean(scores))) np.save("all_scores_fold{}.npy".format(FOLD_ID), np.array(all_scores))
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y = (df.sum_target != 0).astype("float32").values with timer('preprocessing'): train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y) train_sampler = MaskProbSampler(df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) del df, train_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) #if base_model is None: # scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) # scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine) #else: # scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): for epoch in range(71, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) #scheduler.step() if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save(model.module.state_dict(), 'models/{}_latest.pth'.format(EXP_ID)) gc.collect()