def generate_model_submission(snapshot_name: str, config_name: str, postprocess=morphology_postprocess, mine_on_val=True, export_png=False): print('Generating model submission for session', snapshot_name) snapshot_basename = os.path.splitext(os.path.basename(snapshot_name))[0] config_file = auto_file(config_name) save_file = auto_file(snapshot_name) working_dir = os.path.dirname(config_file) # OOF # stratify = config['stratify'] # fold = config['fold'] # # train_ids = D.all_train_ids() # train_indexes, test_indexes = D.get_train_test_split_for_fold(stratify, fold, train_ids) # train_predictions = np.load(train_predictions) # Predictions for train dataset train_predictions, train_dataset = predict_masks_auto(config_file, save_file, test_or_train='train') train_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_train_predictions.npz') np.savez_compressed(train_predictions_file, **dict((image_id, image) for image_id, image in zip(train_dataset.ids, train_predictions))) # Predictions for test dataset test_predictions, test_dataset = predict_masks_auto(config_file, save_file, test_or_train='test') test_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_test_predictions.npz') np.savez_compressed(test_predictions_file, **dict((image_id, image) for image_id, image in zip(test_dataset.ids, test_predictions))) # Save prediction as unit8 masks if export_png: convert_predictions_to_images(train_predictions_file, os.path.join(working_dir, 'train_predictions')) convert_predictions_to_images(test_predictions_file, os.path.join(working_dir, 'test_predictions')) # Threshold mining if mine_on_val: config = json.load(open(config_file)) valid_ids = np.array(config['valid_set']) valid_mask = D.get_selection_mask(train_dataset.ids, valid_ids) true_masks = D.read_train_masks(valid_ids) threshold, lb_score = threshold_mining(train_predictions[valid_mask], true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005) else: true_masks = D.read_train_masks(train_dataset.ids) threshold, lb_score = threshold_mining(train_predictions, true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005) i = np.argmax(lb_score) threshold, lb_score = float(threshold[i]), float(lb_score[i]) suffix = '_mine_on_val' if mine_on_val else '' submit_file = os.path.join(working_dir, '{}_LB{:.4f}_TH{:.4f}{}.csv.gz'.format(snapshot_basename, lb_score, threshold, suffix)) test_predictions = test_predictions > threshold if postprocess is not None: final_masks = [] for image, mask in zip(D.read_test_images(test_dataset.ids), test_predictions): mask = postprocess(image, mask) final_masks.append(mask) test_predictions = np.array(final_masks) create_submission(test_dataset.ids, test_predictions).to_csv(submit_file, compression='gzip', index=False) print('Saved submission to ', working_dir)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('-s', '--snapshot', required=True, default=None, type=str, help='') parser.add_argument('-c', '--config', required=True, default=None, type=str, help='') parser.add_argument('-mov', '--mine-on-val', action='store_true') args = parser.parse_args() snapshot_file = auto_file(args.snapshot) config_file = auto_file(args.config) generate_model_submission(snapshot_file, config_file, mine_on_val=args.mine_on_val)
def test_rle_encode_decode(): train_pred = auto_file( 'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz' ) train_pred = np.load(train_pred) train_ids = D.all_train_ids() true_masks = D.read_train_masks(train_ids) pred_masks = np.array([train_pred[id] for id in train_ids]) pred_masks = (pred_masks > 0.45).astype(np.uint8) submit = create_submission(train_ids, pred_masks) submit.to_csv('test_rle_encode_decode.csv.gz', compression='gzip', index=False) decoded_ids, decoded_masks = decode_submission( 'test_rle_encode_decode.csv.gz') decoded_masks = dict(zip(decoded_ids, decoded_masks)) assert set(decoded_ids) == set(train_ids) decoded_masks = np.array([decoded_masks[id] for id in train_ids]) p1, r1, _ = do_kaggle_metric(pred_masks, true_masks) p2, r2, _ = do_kaggle_metric(decoded_masks, true_masks) assert np.array_equal(p1, p2) assert np.array_equal(r1, r2) print(np.mean(p1), np.mean(p2))
def extract_oof_predictions(model) -> dict: test_predictions = auto_file(f'{model}_test_predictions.npz') train_predictions = auto_file(f'{model}_train_predictions.npz') experiment_dir = os.path.dirname(test_predictions) json_config = [fname for fname in sorted(os.listdir(experiment_dir)) if os.path.splitext(fname)[1] == '.json'] json_config = auto_file(json_config[0]) config = json.load(open(json_config)) stratify = config['stratify'] fold = config['fold'] train_ids = D.all_train_ids() train_indexes, test_indexes = D.get_train_test_split_for_fold(stratify, fold, train_ids) train_predictions = np.load(train_predictions) valid_ids = train_ids[test_indexes] valid_predictions = np.array([train_predictions[id] for id in valid_ids]) oof_predictions = dict(zip(valid_ids, valid_predictions)) np.savez_compressed(os.path.join(experiment_dir, f'{model}_oof_predictions.npz'), **oof_predictions) return oof_predictions
def make_cv_submit(inputs, prefix, output_dir='submits'): os.makedirs(output_dir, exist_ok=True) test_predictions = [auto_file(f'{model}_test_predictions.npz') for model in inputs] oof_predictions = [auto_file(f'{model}_oof_predictions.npz') for model in inputs] train_ids = D.all_train_ids() true_masks = D.read_train_masks(train_ids) test_ids = D.all_test_ids() pred_masks = merge_oof(oof_predictions, train_ids) threshold, lb_score = threshold_mining(pred_masks, true_masks, min_threshold=0.1, max_threshold=0.9, step=0.001) i = np.argmax(lb_score) threshold, lb_score = float(threshold[i]), float(lb_score[i]) print('Threshold', threshold, 'CV score', lb_score) # Arithmetic ensembled_test_pred = ensemble(test_predictions, test_ids, averaging=ArithmeticMean) ensembled_test_pred = ensembled_test_pred > threshold submit_file = f'{prefix}_a_mean_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz' create_submission(test_ids, ensembled_test_pred).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False) print('Saved submission', submit_file) postprocess = morphology_postprocess if postprocess is not None: final_masks = [] for image, mask in zip(D.read_test_images(test_ids), ensembled_test_pred): mask = postprocess(image, mask) final_masks.append(mask) test_predictions = np.array(final_masks) submit_file = f'{prefix}_a_mean_PPC_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz' create_submission(test_ids, test_predictions).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False) print('Saved submission', submit_file)
def decode_submission(submission): if isinstance(submission, str): submission = pd.read_csv(auto_file(submission)) submission = submission.sort_values('id') images = [] ids = [] for index, row in submission.iterrows(): rle_mask = row['rle_mask'] images.append(rle_decode(rle_mask)) ids.append(row['id']) return ids, images
def average_predictions(predictions, dst_file=None): predictions = [auto_file(p) for p in predictions] dir = os.path.dirname(predictions[0]) scale = float(1. / len(predictions)) avg = {} pred = np.load(predictions[0]) ids = pred.keys() for id in ids: avg[id] = pred[id] * scale for pred_file in predictions[1:]: pred = np.load(pred_file) for id in ids: avg[id] += pred[id] * scale if dst_file is not None: np.savez_compressed(os.path.join(dir, dst_file), **avg) print(f'Saved {dst_file}') return avg
def main(): parser = U.get_argparser() args = parser.parse_args() U.set_manual_seed(args.seed) train_session_args = vars(args) train_session = U.get_random_name() current_time = datetime.now().strftime('%b%d_%H_%M') prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}' if args.fold is not None: prefix += f'_fold_{args.stratify}_{args.fold}' log_dir = os.path.join('runs', prefix) exp_dir = os.path.join('experiments', args.model, args.prepare, args.augmentation, prefix) os.makedirs(exp_dir, exist_ok=True) train_ids = D.all_train_ids() depths = D.read_depths(train_ids) images = D.read_train_images(train_ids) masks = D.read_train_masks(train_ids) if args.fix_masks: masks, changed_ids = D.fix_masks(masks, train_ids) with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f: for sample_id in changed_ids: f.write(sample_id) f.write('\n') print(f'Fixed {len(changed_ids)} masks') if args.fold is not None: train_indexes, test_indexes = D.get_train_test_split_for_fold( args.stratify, args.fold, train_ids) else: train_indexes, test_indexes = train_test_split( np.arange(len(train_ids)), shuffle=False, random_state=args.split_seed, test_size=0.2) ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes] img_train, img_test = images[train_indexes], images[test_indexes] mask_train, mask_test = masks[train_indexes], masks[test_indexes] depth_train, depth_test = depths[train_indexes], depths[test_indexes] # Here we can exclude some images from training, but keep in validation train_mask = D.drop_some(img_train, mask_train, drop_black=True, drop_vstrips=args.drop_vstrips, drop_few=args.drop_few) ids_train = ids_train[train_mask] img_train = img_train[train_mask] mask_train = mask_train[train_mask] depth_train = depth_train[train_mask] if not is_sorted(ids_train): raise RuntimeError("ids_train is not sorted") if not is_sorted(ids_test): raise RuntimeError("ids_test_sorted is not sorted") prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args) # This line valid if we apply prepare_fn first and then do augmentation target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE # target_size = D.ORIGINAL_SIZE build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation] aug = build_augmentation_fn(target_size, border_mode=args.border_mode) train_transform_list = [] valid_transform_list = [] if prepare_fn is not None: train_transform_list.append(prepare_fn.t_forward) valid_transform_list.append(prepare_fn.t_forward) train_transform_list.append(aug) trainset = D.ImageAndMaskDataset(ids_train, img_train, mask_train, depth_train, augment=A.Compose(train_transform_list)) validset = D.ImageAndMaskDataset(ids_test, img_test, mask_test, depth_test, augment=A.Compose(valid_transform_list)) trainloader = DataLoader(trainset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, drop_last=True, shuffle=True) validloader = DataLoader(validset, batch_size=args.batch_size, pin_memory=True, drop_last=False, shuffle=False) # Save train/val split for future use train_session_args.update({ 'train_set': list(ids_train), 'valid_set': list(ids_test) }) # Declare variables we will use during training start_epoch = 0 train_history = pd.DataFrame() target_metric = args.target_metric target_metric_mode = 'max' best_metric_val = 0 best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth') model = U.get_model(args.model, num_classes=args.num_classes, num_channels=trainset.channels(), abn=args.abn, use_dropout=not args.no_dropout, pretrained=not args.no_pretrain).cuda() print('Train set size :', len(ids_train), 'batch size', trainloader.batch_size) print('Valid set size :', len(ids_test), 'batch size', validloader.batch_size) print('Tile transform :', prepare_fn if prepare_fn is not None else "None") print('Model :', args.model, count_parameters(model)) print('Augmentations :', args.augmentation, args.border_mode) print('Input channels :', trainset.channels()) print('Output classes :', args.num_classes) print('Optimizer :', args.optimizer, 'wd', args.weight_decay) print('Use of dropout :', not args.no_dropout) print('Train session :', train_session) print('Freeze encoder :', args.freeze_encoder) print('Seed :', args.seed, args.split_seed) print('Restart every :', args.restart_every) print('Fold :', args.fold, args.stratify) print('Fine-tune :', args.fine_tune) print('ABN Mode :', args.abn) print('Fix masks :', args.fix_masks) if args.resume: fname = U.auto_file(args.resume) start_epoch, train_history, best_score = U.restore_checkpoint( fname, model) print(train_history) print('Resuming training from epoch', start_epoch, ' and score', best_score, args.resume) if args.fine_tune and args.freeze_encoder > 0: raise ValueError( 'Incompatible options --fune-tune and --freeze-encoder') writer = SummaryWriter(log_dir) writer.add_text('train/params', '```' + json.dumps(train_session_args, indent=2) + '```', 0) config_fname = os.path.join(exp_dir, f'{train_session}.json') with open(config_fname, 'w') as f: f.write(json.dumps(train_session_args, indent=2)) weights = { 'mask': 1.0, 'class': 0.05, 'dsv': 0.1, } bce = U.get_loss('bce') bce_lovasz = U.get_loss('bce_lovasz') bce_jaccard = U.get_loss('bce_jaccard') losses = { 'warmup': { 'mask': bce, 'class': bce, 'dsv': bce, }, 'main': { 'mask': bce_jaccard, 'class': bce, 'dsv': bce, }, 'annealing': { 'mask': bce_lovasz, 'class': bce, 'dsv': bce, } } epochs = {'warmup': 50, 'main': 250, 'annealing': 50} if args.fast: for key in epochs.keys(): epochs[key] = 1 learning_rates = { 'warmup': args.learning_rate, 'main': 1e-3, 'annealing': 1e-2 } # Warmup phase if epochs['warmup']: print(torch.cuda.max_memory_allocated(), torch.cuda.max_memory_cached()) trainable_parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = U.get_optimizer(args.optimizer, trainable_parameters, learning_rates['warmup'], weight_decay=args.weight_decay) scheduler = None # StepLR(optimizer, gamma=0.5, step_size=50) train_history, best_metric_val, start_epoch = train( model, losses['warmup'], weights, optimizer, scheduler, trainloader, validloader, writer, start_epoch, epochs=epochs['warmup'], early_stopping=args.early_stopping, train_history=train_history, experiment_dir=exp_dir, target_metric=target_metric, best_metric_val=best_metric_val, target_metric_mode=target_metric_mode, checkpoint_filename=best_lb_checkpoint) U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_warmup.pth'), model, start_epoch, train_history, metric_name=target_metric, metric_score=best_metric_val) del trainable_parameters, optimizer, scheduler torch.cuda.empty_cache() torch.cuda.synchronize() print('Finished warmup phase. Main train loop.') # Main training phase print(torch.cuda.max_memory_allocated(), torch.cuda.max_memory_cached()) trainable_parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = U.get_optimizer(args.optimizer, trainable_parameters, learning_rates['main'], weight_decay=args.weight_decay) scheduler = ReduceLROnPlateau(optimizer, mode='max', patience=50, factor=0.5, min_lr=1e-5) train_history, best_metric_val, start_epoch = train( model, losses['main'], weights, optimizer, scheduler, trainloader, validloader, writer, start_epoch, epochs=epochs['main'], early_stopping=args.early_stopping, train_history=train_history, experiment_dir=exp_dir, target_metric=target_metric, best_metric_val=best_metric_val, target_metric_mode=target_metric_mode, checkpoint_filename=best_lb_checkpoint) del trainable_parameters, optimizer, scheduler torch.cuda.empty_cache() torch.cuda.synchronize() snapshots = [best_lb_checkpoint] U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_main.pth'), model, start_epoch, train_history, metric_name=target_metric, metric_score=best_metric_val) print('Finished train phase.') # Cosine annealing if epochs['annealing']: for snapshot in range(5): print(f'Starting annealing phase {snapshot}') print(torch.cuda.max_memory_allocated(), torch.cuda.max_memory_cached()) # model.set_fine_tune(True) trainable_parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = U.get_optimizer('sgd', trainable_parameters, learning_rates['annealing'], weight_decay=args.weight_decay) scheduler = CosineAnnealingLR(optimizer, epochs['annealing'], eta_min=1e-7) snapshot_name = os.path.join( exp_dir, f'{prefix}_{target_metric}_snapshot_{snapshot}.pth') snapshots.append(snapshot_name) train_history, best_metric_val, start_epoch = train( model, losses['annealing'], weights, optimizer, scheduler, trainloader, validloader, writer, start_epoch, epochs=epochs['annealing'], early_stopping=args.early_stopping, train_history=train_history, experiment_dir=exp_dir, target_metric=target_metric, best_metric_val=0, target_metric_mode=target_metric_mode, checkpoint_filename=snapshot_name) del trainable_parameters, optimizer, scheduler torch.cuda.empty_cache() torch.cuda.synchronize() print('Training finished') train_history.to_csv(os.path.join(exp_dir, 'train_history.csv'), index=False) for snapshot_file in snapshots: generate_model_submission(snapshot_file, config_fname, mine_on_val=True)
import numpy as np import matplotlib.pyplot as plt from torch.utils.data import DataLoader from lib.train_utils import find_optimal_lr, auto_file import torch_train as TT if __name__ == '__main__': dd = 'e:/datasets/inria/train' model = TT.get_model('linknet34', patch_size=512, num_channels=3).cuda() loss = TT.get_loss('bce').cuda() optimizer = TT.get_optimizer('sgd', model.parameters(), 1e-4) trainset, validset, num_classes = TT.get_dataset('inria', dd, grayscale=False, patch_size=512) TT.restore_snapshot(model, None, auto_file('linknet34_checkpoint.pth')) trainloader = DataLoader(trainset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) lr, loss = find_optimal_lr(model, loss, optimizer, trainloader) loss = np.convolve(loss, np.ones((4,)) / 4, mode='same') fig, ax = plt.subplots(figsize=(16, 12)) ax.plot(lr, loss) ax.set(xlabel='lr', ylabel='loss', title='LR') ax.set_xscale("log", nonposx='clip') ax.grid() fig.show() plt.savefig('loss_plot.png') print(lr, loss)
def main(): parser = U.get_argparser() args = parser.parse_args() U.set_manual_seed(args.seed) train_session_args = vars(args) train_session = U.get_random_name() current_time = datetime.now().strftime('%b%d_%H_%M') prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}' if args.fold is not None: prefix += f'_fold_{args.stratify}_{args.fold}' log_dir = os.path.join('runs', prefix) exp_dir = os.path.join('experiments', args.model, args.prepare, args.augmentation, prefix) os.makedirs(exp_dir, exist_ok=True) train_ids = D.get_train_ids(drop_black=True, drop_vstrips=args.drop_vstrips, drop_empty=args.drop_empty, drop_few=args.drop_few, fast=args.fast) depths = D.read_depths(train_ids) images = D.read_train_images(train_ids) masks = D.read_train_masks(train_ids) if args.fix_masks: masks, changed_ids = D.fix_masks(masks, train_ids) with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f: for sample_id in changed_ids: f.write(sample_id) f.write('\n') print(f'Fixed {len(changed_ids)} masks') if args.fold is not None: train_indexes, test_indexes = D.get_train_test_split_for_fold( args.stratify, args.fold, train_ids) else: train_indexes, test_indexes = train_test_split( np.arange(len(train_ids)), shuffle=False, random_state=args.split_seed, test_size=0.2) ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes] if not is_sorted(ids_train): raise RuntimeError("ids_train is not sorted") if not is_sorted(ids_test): raise RuntimeError("ids_test_sorted is not sorted") img_train, img_test = images[train_indexes], images[test_indexes] mask_train, mask_test = masks[train_indexes], masks[test_indexes] depth_train, depth_test = depths[train_indexes], depths[test_indexes] prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args) # This line valid if we apply prepare_fn first and then do augmentation target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE # target_size = D.ORIGINAL_SIZE build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation] aug = build_augmentation_fn(target_size, border_mode=args.border_mode) train_transform_list = [] valid_transform_list = [] if prepare_fn is not None: train_transform_list.append(prepare_fn.t_forward) valid_transform_list.append(prepare_fn.t_forward) train_transform_list.append(aug) trainset = D.ImageAndMaskDataset(ids_train, img_train, mask_train, depth_train, augment=A.Compose(train_transform_list)) validset = D.ImageAndMaskDataset(ids_test, img_test, mask_test, depth_test, augment=A.Compose(valid_transform_list)) trainloader = DataLoader(trainset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, drop_last=True, shuffle=True) validloader = DataLoader(validset, batch_size=args.batch_size, pin_memory=True, drop_last=False, shuffle=False) # Save train/val split for future use train_session_args.update({ 'train_set': list(ids_train), 'valid_set': list(ids_test) }) # Declare variables we will use during training start_epoch = 0 train_history = pd.DataFrame() scheduler = None optimizer = None target_metric = args.target_metric target_metric_mode = 'max' best_metric_val = 0 best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth') model = U.get_model(args.model, num_classes=args.num_classes, num_channels=trainset.channels(), abn=args.abn, use_dropout=not args.no_dropout, pretrained=not args.no_pretrain).cuda() print('Train set size :', len(trainloader), 'batch size', trainloader.batch_size) print('Valid set size :', len(validloader), 'batch size', validloader.batch_size) print('Tile transform :', prepare_fn if prepare_fn is not None else "None") print('Model :', args.model, count_parameters(model)) print('Augmentations :', args.augmentation, args.border_mode) print('Input channels :', trainset.channels()) print('Output classes :', args.num_classes) print('Criterion :', args.loss), print('Optimizer :', args.optimizer, args.learning_rate, args.weight_decay) print('Use of dropout :', not args.no_dropout) print('Train session :', train_session) print('Freeze encoder :', args.freeze_encoder) print('Seed :', args.seed, args.split_seed) print('Restart every :', args.restart_every) print('Fold :', args.fold, args.stratify) print('Fine-tune :', args.fine_tune) print('ABN Mode :', args.abn) print('Fix masks :', args.fix_masks) if args.resume: fname = U.auto_file(args.resume) start_epoch, train_history, best_score = U.restore_checkpoint( fname, model) print(train_history) print('Resuming training from epoch', start_epoch, ' and score', best_score, args.resume) segmentation_loss = U.get_loss(args.loss) if args.fine_tune and args.freeze_encoder > 0: raise ValueError( 'Incompatible options --fune-tune and --freeze-encoder') writer = SummaryWriter(log_dir) writer.add_text('train/params', '```' + json.dumps(train_session_args, indent=2) + '```', 0) config_fname = os.path.join(exp_dir, f'{train_session}.json') with open(config_fname, 'w') as f: f.write(json.dumps(train_session_args, indent=2)) # Start training loop no_improvement_epochs = 0 for epoch in range(start_epoch, start_epoch + args.epochs): # On Epoch begin if U.should_quit(exp_dir) or ( args.early_stopping is not None and no_improvement_epochs > args.early_stopping): break epochs_trained = epoch - start_epoch should_restart_optimizer = ( args.restart_every > 0 and epochs_trained % args.restart_every == 0) or (epochs_trained == args.freeze_encoder) or optimizer is None if should_restart_optimizer: del optimizer if args.fine_tune: model.set_fine_tune(args.fine_tune) else: model.set_encoder_training_enabled( epochs_trained >= args.freeze_encoder) trainable_parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = U.get_optimizer(args.optimizer, trainable_parameters, args.learning_rate, weight_decay=args.weight_decay) print('Restarting optimizer state', epoch, count_parameters(model)) if args.lr_scheduler: scheduler = U.get_lr_scheduler(args.lr_scheduler, optimizer, args.epochs) if scheduler is not None and not isinstance(scheduler, ReduceLROnPlateau): scheduler.step(epochs_trained) U.log_learning_rate(writer, optimizer, epoch) # Epoch train_metrics = process_epoch(model, segmentation_loss, optimizer, trainloader, epoch, True, writer, mask_postprocess=prepare_fn.backward) valid_metrics = process_epoch(model, segmentation_loss, None, validloader, epoch, False, writer, mask_postprocess=prepare_fn.backward) all_metrics = {} all_metrics.update(train_metrics) all_metrics.update(valid_metrics) # On Epoch End summary = { 'epoch': [int(epoch)], 'lr': [float(optimizer.param_groups[0]['lr'])] } for k, v in all_metrics.items(): summary[k] = [v] train_history = train_history.append(pd.DataFrame.from_dict(summary), ignore_index=True) print(epoch, summary) if isinstance(scheduler, ReduceLROnPlateau): scheduler.step(all_metrics[target_metric], epochs_trained) if U.is_better(all_metrics[target_metric], best_metric_val, target_metric_mode): best_metric_val = all_metrics[target_metric] U.save_checkpoint(best_lb_checkpoint, model, epoch, train_history, metric_name=target_metric, metric_score=best_metric_val) print('Checkpoint saved', epoch, best_metric_val, best_lb_checkpoint) no_improvement_epochs = 0 else: no_improvement_epochs += 1 print('Training finished') generate_model_submission(best_lb_checkpoint, config_fname, mine_on_val=True)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-g', '--grayscale', action='store_true', help='Whether to use grayscale image instead of RGB') parser.add_argument('-m', '--model', required=True, type=str, help='Name of the model') parser.add_argument('-p', '--patch-size', type=int, default=224) parser.add_argument('-b', '--batch-size', type=int, default=1, help='Batch Size during training, e.g. -b 64') parser.add_argument('-lr', '--learning-rate', type=float, default=1e-3, help='Initial learning rate') parser.add_argument('-l', '--loss', type=str, default='bce', help='Target loss') parser.add_argument('-o', '--optimizer', default='SGD', help='Name of the optimizer') parser.add_argument('-e', '--epochs', type=int, default=100, help='Epoch to run') parser.add_argument('-d', '--dataset', type=str, help='Name of the dataset to use for training.') parser.add_argument('-dd', '--data-dir', type=str, default='data', help='Root directory where datasets are located.') parser.add_argument('-s', '--steps', type=int, default=128, help='Steps per epoch') parser.add_argument('-x', '--experiment', type=str, help='Name of the experiment') parser.add_argument('-w', '--workers', default=0, type=int, help='Num workers') parser.add_argument('-r', '--resume', action='store_true') parser.add_argument('-mem', '--memory', action='store_true') args = parser.parse_args() cudnn.benchmark = True if args.experiment is None: args.experiment = 'torch_%s_%s_afterburn_%d_%s_%s' % ( args.dataset, args.model, args.patch_size, 'gray' if args.grayscale else 'rgb', args.loss) experiment_dir = os.path.join('experiments', args.dataset, args.loss, args.experiment) os.makedirs(experiment_dir, exist_ok=True) writer = SummaryWriter(comment=args.experiment) with open(os.path.join(experiment_dir, 'arguments.txt'), 'w') as f: f.write(' '.join(sys.argv[1:])) trainset, validset, num_classes = TT.get_dataset( args.dataset, args.data_dir, grayscale=args.grayscale, patch_size=args.patch_size, keep_in_mem=args.memory) print('Train set size', len(trainset)) print('Valid set size', len(validset)) trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, drop_last=True) validloader = DataLoader(validset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) head_model = TT.get_model(args.model, patch_size=args.patch_size, num_channels=1 if args.grayscale else 3).cuda() TT.restore_snapshot(head_model, None, auto_file('linknet34_checkpoint.pth')) # Freeze model training for param in head_model.parameters(): param.requires_grad = False afterburner = Afterburner() model = nn.Sequential(head_model, nn.Sigmoid(), afterburner).cuda() optimizer = TT.get_optimizer(args.optimizer, afterburner.parameters(), args.learning_rate) loss = TT.get_loss(args.loss).cuda() metrics = { 'iou': JaccardScore().cuda(), 'accuracy': PixelAccuracy().cuda() } start_epoch = 0 best_loss = np.inf train_history = pd.DataFrame() checkpoint_filename = os.path.join(experiment_dir, f'{args.model}_checkpoint.pth') if args.resume: start_epoch, train_history, best_loss = restore_snapshot( model, optimizer, checkpoint_filename) print('Resuming training from epoch', start_epoch, ' and loss', best_loss) print(train_history) print('Head :', count_parameters(head_model)) print('Afterburner:', count_parameters(afterburner)) for epoch in range(start_epoch, args.epochs): train_loss, train_scores = train(model, loss, optimizer, trainloader, epoch, metrics, summary_writer=writer) valid_loss, valid_scores = validate(model, loss, validloader, epoch, metrics, summary_writer=writer) summary = { 'epoch': [epoch], 'loss': [train_loss.avg], 'val_loss': [valid_loss.avg] } for key, value in train_scores.items(): summary[key] = [value.avg] for key, value in valid_scores.items(): summary['val_' + key] = [value.avg] train_history = train_history.append(pd.DataFrame.from_dict(summary), ignore_index=True) print(epoch, summary) if valid_loss.avg < best_loss: save_snapshot(model, optimizer, valid_loss.avg, epoch, train_history, checkpoint_filename) best_loss = valid_loss.avg print('Checkpoint saved', epoch, best_loss) print('Training is finished...') train_history.to_csv(os.path.join(experiment_dir, args.experiment + '.csv'), index=False, mode='a' if args.resume else 'w', header=not args.resume)
def main(): cudnn.benchmark = True parser = argparse.ArgumentParser() parser.add_argument('-g', '--grayscale', action='store_true', help='Whether to use grayscale image instead of RGB') parser.add_argument('-m', '--model', required=True, type=str, help='Name of the model') parser.add_argument('-c', '--checkpoint', required=True, type=str, help='Name of the model checkpoint') parser.add_argument('-p', '--patch-size', type=int, default=224) parser.add_argument('-b', '--batch-size', type=int, default=1, help='Batch Size during training, e.g. -b 64') parser.add_argument('-dd', '--data-dir', type=str, default='data', help='Root directory where datasets are located.') parser.add_argument('-x', '--experiment', type=str, help='Name of the experiment') parser.add_argument('-f', '--full', action='store_true') args = parser.parse_args() if args.experiment is None: args.experiment = 'inria_%s_%d_%s' % ( args.model, args.patch_size, 'gray' if args.grayscale else 'rgb') experiment_dir = os.path.join('submits', args.experiment) os.makedirs(experiment_dir, exist_ok=True) model = TT.get_model(args.model, patch_size=args.patch_size, num_channels=1 if args.grayscale else 3).cuda() start_epoch, train_history, best_loss = TT.restore_snapshot( model, None, auto_file(args.checkpoint)) print('Using weights from epoch', start_epoch - 1, best_loss) test_transform = aug.Sequential([ aug.ImageOnly(aug.NormalizeImage(mean=INRIA_MEAN, std=INRIA_STD)), ]) x = sorted(find_in_dir(os.path.join(args.data_dir, 'images'))) # x = x[:10] model.eval() with torch.no_grad(): for test_fname in tqdm(x, total=len(x)): image = read_rgb(test_fname) basename = os.path.splitext(os.path.basename(test_fname))[0] if args.full: mask = predict_full(image, model, test_transform) else: mask = predict_tiled(image, model, test_transform, args.patch_size, args.batch_size) mask = ((mask > 0.5) * 255).astype(np.uint8) cv2.imwrite(os.path.join(experiment_dir, basename + '.tif'), mask)
def test_prediction_pipeline_tta_pre(): from lib import tta device = 'cuda' config = auto_file('infallible_lamport.json') snapshot = auto_file( 'Oct09_23_17_wider_unet_224pad_medium_infallible_lamport_val_lb.pth') config = json.load(open(config)) snapshot = torch.load(snapshot) prepare_fn = D.get_prepare_fn(config['prepare'], **config) dataset = get_test_dataset(dataset=config['dataset'], prepare=prepare_fn, test_or_train='train') model = get_model(config['model'], num_classes=config['num_classes'], num_channels=dataset.channels(), pretrained=False).to(device) if device == 'cpu': warnings.warn('Using CPU for prediction. It will be SLOW.') model.load_state_dict(snapshot['model']) model.eval() batch_size = config['batch_size'] collate_fn = tta.tta_fliplr_collate batch_size = max(1, batch_size // 2) pred_masks = [] with torch.no_grad(): loader = DataLoader(dataset, batch_size=batch_size, pin_memory=True, collate_fn=collate_fn) for images, image_ids in tqdm(loader, total=len(loader), desc=f'Predicting'): images = images.to(device, non_blocking=True) output = model(images) is_raw_mask = isinstance(output, torch.Tensor) is_mask_and_class = isinstance(output, tuple) and len(output) == 2 if is_raw_mask: masks = output elif is_mask_and_class: masks, presence = output else: raise RuntimeError('Unknown output type') masks = dataset.resize_fn.backward(masks) masks = np.array([np.squeeze(x) for x in masks.cpu().numpy()]) masks = tta.tta_fliplr_deaug(masks) masks = sigmoid(masks) if is_mask_and_class: presence = presence.softmax(dim=1).cpu().numpy() presence = tta.average_classes(presence, 2) presence = np.argmax(presence, axis=1) masks = zero_masks_inplace(masks, presence == 0) for mask, image_id in zip(masks, image_ids): mask = cv2.resize(mask, (D.ORIGINAL_SIZE, D.ORIGINAL_SIZE), interpolation=cv2.INTER_LANCZOS4) pred_masks.append(mask) del model, loader pred_masks = np.array(pred_masks) true_masks = D.read_train_masks(dataset.ids) plt.figure() binarization_thresholds, scores = threshold_mining(pred_masks, true_masks, min_threshold=0, max_threshold=1) plt.plot(binarization_thresholds, scores) plt.title("test_prediction_pipeline_tta_pre") plt.show() return pred_masks, dataset
def test_inspect_train_predictions(): train_ids = D.all_train_ids() train_images = D.read_train_images(train_ids) train_masks = D.read_train_masks(train_ids) print(train_ids.shape, train_images.shape, train_masks.shape) CONFIG = auto_file('wonderful_goldberg.json') WEIGHT_TRAIN = auto_file( 'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz' ) WEIGHT_TEST = auto_file( 'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_test_predictions.npz' ) convert_predictions_to_images(WEIGHT_TEST, os.path.join('test', 'test_predictions')) convert_predictions_to_images(WEIGHT_TRAIN, os.path.join('test', 'train_predictions')) train_predictions = auto_file(WEIGHT_TRAIN) train_predictions = np.load(train_predictions) # image = train_predictions['0aab0afa9c'] train_predictions = np.array([train_predictions[id] for id in train_ids]) print(train_predictions.shape) threshold, lb_score = threshold_mining(train_predictions, train_masks, min_threshold=0.15, max_threshold=0.85, step=0.005) plt.figure() plt.plot(threshold, lb_score) plt.tight_layout() i = np.argmax(lb_score) best_threshold, best_lb_score = float(threshold[i]), float(lb_score[i]) print(best_threshold, best_lb_score) config_file = auto_file(CONFIG) config = json.load(open(config_file)) valid_ids = np.array(config['valid_set']) valid_mask = D.get_selection_mask(train_ids, valid_ids) val_threshold, val_lb_score = threshold_mining( train_predictions[valid_mask], train_masks[valid_mask], min_threshold=0.15, max_threshold=0.85, step=0.005) plt.figure() plt.plot(val_threshold, val_lb_score) plt.tight_layout() plt.show() val_i = np.argmax(val_lb_score) val_th = val_threshold[val_i] print(val_threshold[val_i], val_lb_score[val_i]) precision, result, threshold = do_kaggle_metric(train_predictions, train_masks, val_th) x = [] y = [] for prec, true_mask in zip(precision, train_masks): x.append(prec) y.append(cv2.countNonZero(true_mask)) plt.figure() plt.scatter(x, y) plt.tight_layout() plt.show()