def prepare(self): model_class = model_dict[self.config.model_class] self.model = model_class(**self.config.model_params.config_dict) if self.resume_path is not None: self.model, check_point = self._load_model( self.resume_path, model_dict[self.config.model_class], self.config.model_params.config_dict, self.model) self.best_model_loss = float('inf') if self.config.restart: self.start_epoch = 0 self.best_model_loss = float('inf') else: self.start_epoch = check_point['epoch'] + 1 self.best_model_loss = check_point['loss'] if self.use_cuda: self.model = convert_model(self.model) self.model = nn.DataParallel(self.model) self.model.to("cuda") loader_class = loader_dict[self.config.loader_class] self.dataloader = loader_class(**self.config.loader_params.config_dict) self.train_loader = self.dataloader.train_loader self.valid_loader = self.dataloader.val_loader self.test_loader = self.dataloader.test_loader self.train_params = { 'params': self.model.parameters(), 'lr': self.base_lr[0], 'weight_dacay': self.weight_decay }, if self.config.optim_key == 'sgd': self.optimizer = optim.SGD(self.train_params, nesterov=False, weight_decay=self.weight_decay, momentum=self.momentum) elif self.config.optim_key == 'adam': self.optimizer = optim.Adam(self.train_params) else: raise RuntimeError('optim error') self.scheduler = PolyScheduler(self.base_lr, self.total_epoch, len(self.train_loader), warmup_epochs=0) self.pred_loss = loss_dict[self.config.loss_class]( **config.loss_params.config_dict) self.seg_metric_board = metric_dict[self.config.seg_metric_class] self.seg_evaluator = MetricEvaluator(self.seg_metric_board) self.cls_metric_board = metric_dict[self.config.cls_metric_class] self.cls_evaluator = MetricEvaluator(self.cls_metric_board) self.kf_metric_board = metric_dict[self.config.kf_metric_class] self.kf_evaluator = MetricEvaluator(self.kf_metric_board) self.best_rlt = None
def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[], init_flag=True): if len(gpu_ids) > 0: assert(torch.cuda.is_available()) net = convert_model(net) net.to(gpu_ids[0]) net = torch.nn.DataParallel(net, gpu_ids) if init_flag: init_weights(net, init_type, gain=init_gain) return net
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDatasetTest(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): models = [] model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) models.append(model) with timer('predict'): rles, sub_ids = predict(models, val_loader, device) sub_df = pd.DataFrame({ 'ImageId_ClassId': sub_ids, 'EncodedPixels': rles }) LOGGER.info(sub_df.head()) sub_df.to_csv('{}_{}.csv'.format(EXP_ID, FOLD_ID), index=False)
def __init__(self, mode, optim, scheduler, model, config, model_dir, device, device_ids, num_classes): assert mode in ['training', 'inference'] self.mode = mode self.model = model self.cuda = torch.cuda.is_available() self.model_dir = model_dir self.optim = optim self.epoch = 0 self.num_classes = num_classes self.gamma = 0.2 self.config = config self.scheduler = scheduler self.set_log_dir() self.device = device self.device_ids = device_ids self.model = convert_model(self.model) self.model = self.model.to(self.device) self.model = nn.DataParallel(self.model, self.device_ids)
def load_from_file_model_optimizer_scheduler( filename, model, optimizer, scheduler, data_parallel=True, sync_batch=False, ): checkpoint = torch.load(filename) if data_parallel: model = nn.DataParallel(model) if sync_batch: model = convert_model(model) model.load_state_dict(checkpoint['state_dict_1']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) return model, optimizer, scheduler
def __init__(self, config): super().__init__() self.config = config self.model = EfficientNet.from_pretrained(config.backbone_name) self.model = convert_model(self.model) self.c = { 'efficientnet-b0': 1280, 'efficientnet-b1': 1280, 'efficientnet-b2': 1408, 'efficientnet-b3': 1536, 'efficientnet-b4': 1792, 'efficientnet-b5': 2048, 'efficientnet-b6': 2304, 'efficientnet-b7': 2560 }[config.backbone_name] self.dropout = nn.Dropout(config.dropout) self.out = nn.Linear(in_features=self.c, out_features=config.num_targets, bias=True) self.sub_1 = nn.Linear(in_features=self.c, out_features=3, bias=True)
def __init__(self, classes, lstm_units, pool='avgpool', pretrain=True, sync_bn=False, load_lstm=None, load_backbone=None, train_backbone=True): super().__init__() resnet = models.resnet50(pretrained=pretrain) in_planes = resnet.fc.in_features if pool == 'groupconv': resnet.avgpool = nn.Sequential( nn.Conv2d(in_planes, in_planes, (7, 7), groups=32, bias=False) ) print('Using group conv for pooling') resnet.fc = nn.Sequential() if load_backbone is not None: load_network(resnet, load_backbone, 'module.backbone.') if sync_bn: print('Convert model using sync bn') resnet = convert_model(resnet) for param in resnet.parameters(): param.requires_grad = train_backbone self.backbone = resnet self.lstm = nn.LSTM(in_planes, lstm_units) if load_lstm is not None: load_network(self.lstm, load_lstm, 'module.lstm.') self.fc = nn.Linear(lstm_units, classes)
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) soft_df = pd.read_csv(SOFT_PATH) df = df.append(pd.read_csv(PSEUDO_PATH)).reset_index(drop=True) soft_df = soft_df.append( pd.read_csv(PSEUDO_PATH)).reset_index(drop=True) soft_df = df[[ID_COLUMNS]].merge(soft_df, how="left", on=ID_COLUMNS) LOGGER.info(df.head()) LOGGER.info(soft_df.head()) for c in [ "EncodedPixels_1", "EncodedPixels_2", "EncodedPixels_3", "EncodedPixels_4" ]: df[c] = df[c].astype(str) soft_df[c] = soft_df[c].astype(str) df["fold_id"] = df["fold_id"].fillna(FOLD_ID + 1) y = (df.sum_target != 0).astype("float32").values y += (soft_df.sum_target != 0).astype("float32").values y = y / 2 with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] train_soft_df, val_soft_df = soft_df[df.fold_id != FOLD_ID], soft_df[ df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), ], p=0.5), OneOf([ GaussNoise(p=0.5), ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train, soft_df=train_soft_df) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation, soft_df=val_soft_df) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss, val_score = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) LOGGER.info('Mean valid score: {}'.format(round(val_score, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save( model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def main(config): opts = config() path = opts.path train = pd.read_csv(f'{path}/train.csv') sub = pd.read_csv(f'{path}/sample_submission.csv') n_train = len(os.listdir(f'{path}/train_images')) n_test = len(os.listdir(f'{path}/test_images')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1]) sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0]) train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts() train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().value_counts() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) valid_ids = pd.read_csv("csvs/valid_threshold.csv")["img_id"].values test_ids = sub['Image_Label'].apply( lambda x: x.split('_')[0]).drop_duplicates().values # print(valid_ids) ENCODER = opts.backborn ENCODER_WEIGHTS = opts.encoder_weights DEVICE = 'cuda' ACTIVATION = None model = get_model(model_type=opts.model_type, encoder=ENCODER, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, task=opts.task, attention_type=opts.attention_type, head='simple', center=opts.center, tta=opts.tta) if opts.refine: model = get_ref_model(infer_model=model, encoder=opts.ref_backborn, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, preprocess=opts.preprocess, tta=opts.tta) model = convert_model(model) preprocessing_fn = encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) encoded_pixels = [] runner = SupervisedRunner() probabilities = np.zeros((2220, 350, 525)) for i in range(opts.fold_max): if opts.refine: logdir = f"{opts.logdir}_refine/fold{i}" else: logdir = f"{opts.logdir}/fold{i}" valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_loader = DataLoader(valid_dataset, batch_size=opts.batchsize, shuffle=False, num_workers=opts.num_workers) loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) valid_masks = [] for i, (batch, output) in enumerate( tqdm.tqdm( zip(valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] += sigmoid(probability) probabilities /= opts.fold_max if opts.tta: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_valid.npy', probabilities) else: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_valid.npy', probabilities) torch.cuda.empty_cache() gc.collect() class_params = {} cv_d = [] for class_id in tqdm.trange(opts.class_num, desc='class_id', leave=False): # print(class_id) attempts = [] for tt in tqdm.trange(0, 100, 10, desc='top_threshold', leave=False): tt /= 100 for bt in tqdm.trange(0, 100, 10, desc='bot_threshold', leave=False): bt /= 100 for ms in tqdm.tqdm([ 0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000, 18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000, 50000 ], desc='min_size', leave=False): masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( probability, tt, ms, bt) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): # print(i.shape, j.shape) if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((tt, ms, bt, np.mean(d))) attempts_df = pd.DataFrame( attempts, columns=['top_threshold', 'size', 'bottom_threshold', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) cv_d.append(attempts_df['dice'].values[0]) best_top_threshold = attempts_df['top_threshold'].values[0] best_size = attempts_df['size'].values[0] best_bottom_threshold = attempts_df['bottom_threshold'].values[0] class_params[class_id] = (best_top_threshold, best_size, best_bottom_threshold) cv_d = np.array(cv_d) print("CV Dice:", np.mean(cv_d)) pathlist = [ "../input/test_images/" + i.split("_")[0] for i in sub['Image_Label'] ] del masks del valid_masks del probabilities gc.collect() ############# predict ################### probabilities = np.zeros((n_test, 4, 350, 525)) for fold in tqdm.trange(opts.fold_max, desc='fold loop'): if opts.refine: logdir = f"{opts.logdir}_refine/fold{fold}" else: logdir = f"{opts.logdir}/fold{fold}" # loaders = {"test": test_loader} test_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=opts.batchsize, shuffle=False, num_workers=opts.num_workers) runner_out = runner.predict_loader( model, test_loader, resume=f"{logdir}/checkpoints/best.pth", verbose=True) for i, batch in enumerate( tqdm.tqdm(runner_out, desc='probability loop')): for j, probability in enumerate(batch): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i, j, :, :] += sigmoid(probability) gc.collect() probabilities /= opts.fold_max if opts.tta: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_test.npy', probabilities) else: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_test.npy', probabilities) image_id = 0 print("##################### start post_process #####################") for i in tqdm.trange(n_test, desc='post porocess loop'): for probability in probabilities[i]: predict, num_predict = post_process(probability, class_params[image_id % 4][0], class_params[image_id % 4][1], class_params[image_id % 4][2]) if num_predict == 0: encoded_pixels.append('') else: black_mask = get_black_mask(pathlist[image_id]) predict = np.multiply(predict, black_mask) r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 gc.collect() print("##################### Finish post_process #####################") ####################################### sub['EncodedPixels'] = encoded_pixels sub.to_csv( f'submissions/submission_{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(-1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(-1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(-1, 1) y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1) y = np.concatenate([y1, y2, y3, y4], axis=1) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100,140), p=0.5), RandomBrightnessContrast(p=0.5), ], p=0.5), OneOf([ GaussNoise(p=0.5), ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train, gamma=GAMMA) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation, gamma=GAMMA) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.SGD( model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001, nesterov=False, ) scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=0) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 best_model_score = 0 checkpoint = base_ckpt+1 for epoch in range(84, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss, val_score = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) LOGGER.info('Mean valid score: {}'.format(round(val_score, 5))) scheduler.step() if val_score > best_model_score: torch.save(model.module.state_dict(), 'models/{}_fold{}_ckpt{}_score.pth'.format(EXP_ID, FOLD_ID, checkpoint)) best_model_score = val_score best_model_ep_score = epoch if valid_loss < best_model_loss: torch.save(model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format(EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save(model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format(round(best_model_loss, 5), best_model_ep)) LOGGER.info('Best valid score: {} on epoch={}'.format(round(best_model_score, 5), best_model_ep_score)) checkpoint += 1 best_model_loss = 999 best_model_score = 0 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format(round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
) # init model model = MODEL(num_classes=N_CLASSES, dropout_p=DROPOUT) # iterate though state dicts in specified folder and obtain predictions for each submissions = [] for file in os.listdir(MODELS_FOLDER): model.load_state_dict(torch.load(f'{MODELS_FOLDER}/{file}')['model']) # if 'swa' in file: # model.load_state_dict(torch.load(f'{MODELS_FOLDER}/{file}')) # else: # model.load_state_dict(torch.load(f'{MODELS_FOLDER}/{file}')['model']) model.to(DEVICE) learner = AccGradLearner(data, model) learner.loss_func = None learner.model = convert_model(learner.model) learner.model = nn.DataParallel(learner.model).to(DEVICE) learner.model.eval() print(f'Inferring from model {file}') submissions.append(prepare_submit(learner)) # merge submissions and store into file if len(submissions) == 1: save_submit(submissions[0]) else: save_submit(merge_submissions(submissions))
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('se_resnext50_32x4d', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish") model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save( model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def main(config): opts = config() path = opts.path train = pd.read_csv(f'{path}/train.csv') pseudo_label = pd.read_csv( './submissions/submission_segmentation_and_classifier.csv') n_train = len(os.listdir(f'{path}/train_images')) n_test = len(os.listdir(f'{path}/test_images')) print(f'There are {n_train} images in train dataset') print(f'There are {n_test} images in test dataset') train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts() train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().value_counts() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[ 0]).value_counts().reset_index().rename( columns={ 'index': 'img_id', 'Image_Label': 'count' }) print(id_mask_count.head()) pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[1]).value_counts() pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0] ).value_counts().value_counts() pseudo_label['label'] = pseudo_label['Image_Label'].apply( lambda x: x.split('_')[1]) pseudo_label['im_id'] = pseudo_label['Image_Label'].apply( lambda x: x.split('_')[0]) pseudo_label_ids = pseudo_label.loc[ pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().reset_index().rename( columns={ 'index': 'img_id', 'Image_Label': 'count' }) print(pseudo_label_ids.head()) if not os.path.exists("csvs/train_all.csv"): train_ids, valid_ids = train_test_split( id_mask_count, random_state=39, stratify=id_mask_count['count'], test_size=0.1) valid_ids.to_csv("csvs/valid_threshold.csv", index=False) train_ids.to_csv("csvs/train_all.csv", index=False) else: train_ids = pd.read_csv("csvs/train_all.csv") valid_ids = pd.read_csv("csvs/valid_threshold.csv") for fold, ((train_ids_new, valid_ids_new), (train_ids_pl, valid_ids_pl)) in enumerate( zip( stratified_groups_kfold(train_ids, target='count', n_splits=opts.fold_max, random_state=0), stratified_groups_kfold(pseudo_label_ids, target='count', n_splits=opts.fold_max, random_state=0))): train_ids_new.to_csv(f'csvs/train_fold{fold}.csv') valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv') train_ids_new = train_ids_new['img_id'].values valid_ids_new = valid_ids_new['img_id'].values train_ids_pl = train_ids_pl['img_id'].values valid_ids_pl = valid_ids_pl['img_id'].values ENCODER = opts.backborn ENCODER_WEIGHTS = opts.encoder_weights DEVICE = 'cuda' ACTIVATION = None model = get_model( model_type=opts.model_type, encoder=ENCODER, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, task=opts.task, center=opts.center, attention_type=opts.attention_type, head='simple', classification=opts.classification, ) model = convert_model(model) preprocessing_fn = encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = opts.num_workers bs = opts.batchsize train_dataset = CloudDataset( df=train, label_smoothing_eps=opts.label_smoothing_eps, datatype='train', img_ids=train_ids_new, transforms=get_training_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids_new, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) ################# make pseudo label dataset ####################### train_dataset_pl = CloudPseudoLabelDataset( df=pseudo_label, datatype='train', img_ids=train_ids_pl, transforms=get_training_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset_pl = CloudPseudoLabelDataset( df=pseudo_label, datatype='train', img_ids=valid_ids_pl, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) # train_dataset = ConcatDataset([train_dataset, train_dataset_pl]) # valid_dataset = ConcatDataset([valid_dataset, valid_dataset_pl]) train_dataset = ConcatDataset([train_dataset, valid_dataset_pl]) ################# make pseudo label dataset ####################### train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, drop_last=True) valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers, drop_last=True) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = opts.max_epoch logdir = f"{opts.logdir}/fold{fold}" optimizer = get_optimizer(optimizer=opts.optimizer, lookahead=opts.lookahead, model=model, separate_decoder=True, lr=opts.lr, lr_e=opts.lr_e) opt_level = 'O1' model.cuda() model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) scheduler = opts.scheduler(optimizer) criterion = opts.criterion runner = SupervisedRunner() if opts.task == "segmentation": callbacks = [DiceCallback()] else: callbacks = [] if opts.early_stop: callbacks.append( EarlyStoppingCallback(patience=10, min_delta=0.001)) if opts.mixup: callbacks.append(MixupCallback(alpha=0.25)) if opts.accumeration is not None: callbacks.append(CriterionCallback()) callbacks.append( OptimizerCallback(accumulation_steps=opts.accumeration)) print( f"############################## Start training of fold{fold}! ##############################" ) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=num_epochs, verbose=True) print( f"############################## Finish training of fold{fold}! ##############################" ) del model del loaders del runner torch.cuda.empty_cache() gc.collect()
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y = (df.sum_target != 0).astype("float32").values with timer('preprocessing'): train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y) train_sampler = MaskProbSampler(df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) del df, train_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) #if base_model is None: # scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) # scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine) #else: # scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): for epoch in range(71, EPOCHS + 1): seed = seed + epoch seed_torch(seed) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) #scheduler.step() if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save(model.module.state_dict(), 'models/{}_latest.pth'.format(EXP_ID)) gc.collect()
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape( -1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape( -1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape( -1, 1) y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape( -1, 1) y = np.concatenate([y1, y2, y3, y4], axis=1) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('se_resnext50_32x4d', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) if EMA: ema_model = copy.deepcopy(model) if base_model_ema is not None: ema_model.load_state_dict(torch.load(base_model_ema)) ema_model.to(device) else: ema_model = None model = torch.nn.DataParallel(model) ema_model = torch.nn.DataParallel(ema_model) with timer('train'): valid_loss = validate(model, val_loader, criterion, device, classification=CLASSIFICATION)
classes=4, activation=ACTIVATION) elif conf.seg_net == 'ocunet': model = Unet(encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, use_oc_module=True) else: raise Exception('unsupported' + str(args.seg_net)) if conf.backbone_weights is not None: load_pretrained_weights(conf.backbone_weights, model) if torch.cuda.device_count() > 1: model = convert_model(model) if torch.cuda.is_available(): model.cuda() if conf.loss == 'bce-dice': loss = smp.utils.losses.BCEDiceLoss(eps=1.) elif conf.loss == 'lovasz': loss = lovasz_hinge elif conf.loss == 'weighted-bce': loss = weighted_bce elif conf.loss == 'focal': loss = BinaryFocalLoss() loss.__name__ = 'bin_focal' else: raise Exception('unsupported loss', args.loss)
num_workers=NUM_WORKERS, normalization=STATISTICS, ) # init model swa_model = MODEL(num_classes=N_CLASSES, dropout_p=DROPOUT) model = MODEL(num_classes=N_CLASSES, dropout_p=DROPOUT) # nullify all swa model parameters swa_params = swa_model.parameters() for swa_param in swa_params: swa_param.data = torch.zeros_like(swa_param.data) # average model n_swa = len(os.listdir(MODELS_FOLDER)) print(f"Averaging {n_swa} models") for file in os.listdir(MODELS_FOLDER): model.load_state_dict(torch.load(f'{MODELS_FOLDER}/{file}')['model']) model_params = model.parameters() for model_param, swa_param in zip(model_params, swa_params): swa_param.data += model_param.data / n_swa # fix batch norm print("Fixing batch norm") swa_model.to(DEVICE) learn = Learner(data, model, model_dir=MODELS_FOLDER, loss_func=CRITERION, opt_func=OPTIMIZER, wd=WD) learn.model = convert_model(learn.model) learn.model = nn.DataParallel(learn.model).to(DEVICE) fix_batchnorm(learn.model, learn.data.train_dl) learn.save('swa_model')
L1_criterion = nn.L1Loss(reduction='sum') """ Define generator/discriminator """ model = MMDenseNet(indim=2, outdim=2, drop_rate=0.25, bn_size=4, k1=10, l1=3, k2=14, l2=4, attention='CBAM') if mode == 'ddp': model = convert_model(model) model.cuda() optimizer = RAdam(model.parameters(), lr=learning_rate / TTUR, betas=(beta1, beta2), eps=1e-8, weight_decay=weight_decay) optimizer = Lookahead(optimizer, alpha=0.5, k=6) if mixed: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') scheduler = StepLR(optimizer, step_size=step_size, gamma=scheduler_gamma) if mode == 'ddp': model = torch.nn.parallel.DistributedDataParallel(
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) df.drop("EncodedPixels_2", axis=1, inplace=True) df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"}) df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"}) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape( -1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape( -1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape( -1, 1) #y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1) y = np.concatenate([y1, y2, y3], axis=1) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), ], p=0.5), OneOf([ GaussNoise(p=0.5), ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True, mode="train") model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) if EMA: ema_model = copy.deepcopy(model) if base_model_ema is not None: ema_model.load_state_dict(torch.load(base_model_ema)) ema_model.to(device) ema_model = torch.nn.DataParallel(ema_model) else: ema_model = None model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ema_loss = 999 best_model_ep = 0 ema_decay = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) if epoch >= EMA_START: ema_decay = 0.99 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION, ema_model=ema_model, ema_decay=ema_decay) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) if EMA and epoch >= EMA_START: ema_valid_loss = validate(ema_model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean EMA valid loss: {}'.format( round(ema_valid_loss, 5))) if ema_valid_loss < best_model_ema_loss: torch.save( ema_model.module.state_dict(), 'models/{}_fold{}_ckpt{}_ema.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_ema_loss = ema_valid_loss scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save( model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) if EMA: torch.save( ema_model.module.state_dict(), 'models/{}_fold{}_latest_ema.pth'.format( EXP_ID, FOLD_ID)) LOGGER.info('Best ema valid loss: {}'.format( round(best_model_ema_loss, 5))) best_model_ema_loss = 999 checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
save_top_k=4, verbose=True, monitor='val_auc', mode='max', ) early_stop_callback = EarlyStopping(monitor='val_auc', min_delta=0.00, patience=6, verbose=True, mode='max') # train model = convert_model( MelanomaModel(model_name, ds_train, ds_val, ds_test, batch_size=batch_size)) trainer = pl.Trainer(max_epochs=100000, logger=tb_log, checkpoint_callback=checkpoint_callback, early_stop_callback=early_stop_callback, gpus=gpus, distributed_backend='dp', num_sanity_val_steps=0) trainer.fit(model) ## inference best_epoch = max(model.val_auc, key=model.val_auc.get) oof_preds = pd.DataFrame(index=df_val.index).reset_index()