def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'validation', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args, coords) root = Path(args.root) model = SSPD() model = utils.cuda(model) criterion = SSPDLoss() if args.mode == 'train': kwargs = dict(min_scale=args.min_scale, max_scale=args.max_scale) train_loader, valid_loader = (utils.make_loader( PointDataset, args, train_paths, coords, **kwargs), utils.make_loader(PointDataset, args, valid_paths, coords, deterministic=True, **kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions)
def load_all_features(root: Path, only_valid: bool, args) -> Dict[str, np.ndarray]: features_path = root.joinpath('features.npz') # type: Path coords = utils.load_coords() pred_paths = list(root.glob('*-pred.npy')) get_id = lambda p: int(p.name.split('-')[0]) if only_valid: valid_ids = {int(p.stem) for p in utils.labeled_paths()} pred_paths = [p for p in pred_paths if get_id(p) in valid_ids] if args.limit: pred_paths = pred_paths[:args.limit] if not args.new_features and features_path.exists(): print('Loading features...') data = dict(np.load(str(features_path))) clf_features_path = root.joinpath('clf_features.npz') if clf_features_path.exists(): clf_features = np.load(str(clf_features_path))['xs'] data['xs'] = np.concatenate([data['xs'], clf_features], axis=2) for i in range(clf_features.shape[2]): feature_name = 'clf-{}'.format(i) ALL_FEATURE_NAMES.append(feature_name) FEATURE_NAMES.append(feature_name) print('done.') ids = [get_id(p) for p in pred_paths] assert set(ids) == set(data['ids'][0]) return data print('{} total'.format(len(pred_paths))) data = {k: [[] for _ in range(utils.N_CLASSES)] for k in ['ids', 'scales', 'xs', 'ys']} blob_data = {k: [[] for _ in range(utils.N_CLASSES)] for k in ['blobs', 'blob_ids']} with multiprocessing.pool.Pool(processes=24) as pool: for id, scale, xs, ys, blobs, blob_ids in tqdm.tqdm( pool.imap(partial(load_xs_ys, coords=coords), pred_paths, chunksize=2), total=len(pred_paths)): for cls in range(utils.N_CLASSES): data['ids'][cls].extend([id] * len(ys[cls])) data['scales'][cls].extend([scale] * len(ys[cls])) data['xs'][cls].extend(xs[cls]) data['ys'][cls].extend(ys[cls]) blob_data['blobs'][cls].append((id, scale, blobs[cls])) blob_data['blob_ids'][cls].extend(blob_ids[cls]) data = {k: np.array(v, dtype=np.int32 if k in {'ids', 'ys'} else np.float32) for k, v in data.items()} with features_path.open('wb') as f: np.savez(f, **data) with root.joinpath('blobs.pkl').open('wb') as f: pickle.dump(blob_data, f) return data
def __init__(self, fundus_dir, vessel_dir, od_label_path, fovea_label_path, batch_size): self.fundus, self.vessel, self.coords = utils.load_coords( fundus_dir, vessel_dir, od_label_path, fovea_label_path) self.is_train = False super(ValidationBatchFetcher, self).__init__(batch_size)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--bg-weight', type=float, default=1.0, help='background weight') arg('--dice-weight', type=float, default=0.0) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--oversample', type=float, default=0.0, help='sample near lion with given probability') arg('--with-head', action='store_true') arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args) root = Path(args.root) model = UNetWithHead() if args.with_head else UNet() model = utils.cuda(model) criterion = Loss(dice_weight=args.dice_weight, bg_weight=args.bg_weight) loader_kwargs = dict( min_scale=args.min_scale, max_scale=args.max_scale, downscale=args.with_head, ) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( SegmentationDataset, args, train_paths, coords, oversample=args.oversample, **loader_kwargs), utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) # remove dir tree root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation')) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=root, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale, downsampled=args.with_head) elif args.mode == 'predict_test': out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.png') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True, downsampled=args.with_head) else: parser.error('Unexpected mode {}'.format(args.mode))
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('out_path', help='path to UNet features', type=Path) arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=160) arg('--offset', type=int, default=6) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args, coords) root = Path(args.root) model = VGGModel(args.patch_size) model = utils.cuda(model) criterion = nn.CrossEntropyLoss() loader_kwargs = dict(min_scale=args.min_scale, max_scale=args.max_scale, offset=args.offset) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( ClassificationDataset, args, train_paths, coords, **loader_kwargs), utils.make_loader(ClassificationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train( args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions, is_classification=True, make_optimizer=lambda lr: SGD([ { 'params': model.features.parameters(), 'lr': lr }, { 'params': model.classifier.parameters(), 'lr': lr }, ], nesterov=True, momentum=0.9), ) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(ClassificationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation'), is_classification=True) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=args.out_path, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale) elif args.mode == 'predict_test': assert False # FIXME - use out_path too out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.jpg') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True) else: parser.error('Unexpected mode {}'.format(args.mode))
def train(data, *regs, save_to=None, concat_features=False, explain=False): coords = utils.load_coords() concated_xs = np.concatenate(data['xs'], axis=1) all_rmse, all_patch_rmse, all_baselines = [], [], [] regs_name = ', '.join(type(reg).__name__ for reg in regs) fitted_regs = [] expl_by_cls = defaultdict(list) for cls in range(utils.N_CLASSES): ids = data['ids'][cls] scales = data['scales'][cls] ys = data['ys'][cls] xs = input_features(concated_xs if concat_features else data['xs'][cls]) # indices = np.array(sorted(range(len(ids)), key=lambda i: (scales[i], ids[i]))) # ids, xs, ys = ids[indices], xs[indices], ys[indices] pred, fitted = train_predict(regs, xs, ys, ids) ys_by_id, pred_by_id = [], [] unique_ids = sorted(set(ids)) pred_by_id = get_pred_by_id(ids, pred, unique_ids) for img_id in unique_ids: try: ys_by_id.append((coords.loc[[img_id]].cls == cls).sum()) except KeyError: ys_by_id.append(0) pred_by_id = round_prediction(pred_by_id) patch_rmse = np.sqrt(metrics.mean_squared_error(ys, pred)) rmse = np.sqrt(metrics.mean_squared_error(ys_by_id, pred_by_id)) baseline_rmse = np.sqrt(metrics.mean_squared_error( cross_val_predict(DummyRegressor(), [[0]] * len(ys_by_id), ys_by_id, cv=5), ys_by_id)) print('cls {}, patch mean {:.3f}, patch RMSE {:.3f}, ' 'image mean {:.2f}, image RMSE {:.2f}, baseline RMSE {:.2f}' .format(cls, np.mean(ys), patch_rmse, np.mean(ys_by_id), rmse, baseline_rmse)) all_rmse.append(rmse) all_patch_rmse.append(patch_rmse) all_baselines.append(baseline_rmse) if save_to: fitted_regs.append(fitted) if explain: for reg in fitted: expl = eli5.explain_weights(reg, feature_names=FEATURE_NAMES) expl_by_cls[cls].append(expl) print(type(reg).__name__, format_as_text( expl, show=('method', 'targets', 'feature_importances'))) print('{} with {} features: mean patch RMSE {:.3f}, mean image RMSE {:.2f}, ' 'mean baseline RMSE {:.2f}' .format(regs_name, ', '.join(FEATURE_NAMES), np.mean(all_patch_rmse), np.mean(all_rmse), np.mean(all_baselines))) if save_to: joblib.dump(fitted_regs, save_to) print('Saved to', save_to) if explain: dfs = [] for cls, expls in expl_by_cls.items(): for expl in expls: df = eli5.format_as_dataframe(expl) df['cls'] = cls df['estimator'] = expl.estimator.split('(')[0] dfs.append(df) df = pd.concat(dfs) df.reset_index(inplace=True) df['feature'] = df['index'] del df['index'] df = df[['feature', 'cls', 'estimator', 'std', 'weight']] df.to_csv('feature_importances.csv', index=None)