def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--dice-weight', type=float) arg('--nll-weights', action='store_true') arg('--device-ids', type=str, help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--size', type=str, default='1280x1920', help='Input size, for example 288x384. Must be multiples of 32') utils.add_args(parser) args = parser.parse_args() model_name = 'unet_11' args.root = str(utils.MODEL_PATH / model_name) root = Path(args.root) root.mkdir(exist_ok=True, parents=True) model = UNet11() device_ids = list(map(int, args.device_ids.split(','))) model = nn.DataParallel(model, device_ids=device_ids).cuda() loss = Loss() if __name__ == '__main__': main()
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--dice-weight', type=float) arg('--nll-weights', action='store_true') arg('--device-ids', type=str, help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--size', type=str, default='1280x1920', help='Input size, for example 288x384. Must be multiples of 32') utils.add_args(parser) args = parser.parse_args() model_name = 'unet_11' args.root = str(utils.MODEL_PATH / model_name) root = Path(args.root) root.mkdir(exist_ok=True, parents=True) model = UNet11() device_ids = list(map(int, args.device_ids.split(','))) model = nn.DataParallel(model, device_ids=device_ids).cuda() loss = Loss() def make_loader(ds_root: Path, to_augment=False, shuffle=False): return DataLoader(dataset=CarvanaDataset(ds_root, to_augment=to_augment), shuffle=shuffle, num_workers=args.workers, batch_size=args.batch_size, pin_memory=True) train_root = utils.DATA_ROOT / str(args.fold) / 'train' valid_root = utils.DATA_ROOT / str(args.fold) / 'val' valid_loader = make_loader(valid_root) train_loader = make_loader(train_root, to_augment=True, shuffle=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(init_optimizer=lambda lr: Adam(model.parameters(), lr=lr), args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=validation, fold=args.fold)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--mode', choices=['train', 'valid', 'predict_valid', 'predict_test'], default='train') arg('--limit', type=int, help='use only N images for valid/train') arg('--dice-weight', type=float, default=0.0) arg('--nll-weights', action='store_true') arg('--device-ids', type=str, help='For example 0,1 to run on two GPUs') arg('--size', type=str, default='768x512', help='Input size, for example 768x512. Must be multiples of 32') arg('--model') utils.add_args(parser) args = parser.parse_args() root = Path(args.root) if args.model: model = getattr(unet_models, args.model)() else: model = UNet() w, h = map(int, args.size.split('x')) if not (w % 32 == 0 and h % 32 == 0): parser.error('Wrong --size: both dimensions should be multiples of 32') size = (w, h) out_size = (w // model.output_downscaled, h // model.output_downscaled) if utils.cuda_is_available: if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() if args.nll_weights: class_weighs = np.sqrt(np.array( [1 / ratio for cls, ratio in dataset.CLS_RATIOS.items()])) class_weighs /= class_weighs.sum() else: class_weighs = None loss = Loss(dice_weight=args.dice_weight, class_weights=class_weighs) if args.limit: limit = args.limit valid_limit = limit // 5 else: limit = valid_limit = None def make_loader(ds_root: Path, limit_: int): return DataLoader( dataset=StreetDataset(ds_root, size, out_size=out_size, limit=limit_), shuffle=True, num_workers=args.workers, batch_size=args.batch_size, ) valid_root = utils.DATA_ROOT / 'validation' if args.mode == 'train': train_loader = make_loader(utils.DATA_ROOT / 'training', limit) valid_loader = make_loader(valid_root, valid_limit) if root.exists() and args.clean: shutil.rmtree(str(root)) root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train( init_optimizer=lambda lr: Adam(model.parameters(), lr=lr), args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=validation, save_predictions=save_predictions, patience=2, ) elif args.mode == 'valid': valid_loader = make_loader(valid_root, valid_limit) state = torch.load(str(Path(args.root) / 'model.pt')) model.load_state_dict(state['model']) validation(model, loss, tqdm.tqdm(valid_loader, desc='Validation')) elif args.mode == 'predict_valid': utils.load_best_model(model, root) predict(model, valid_root, out_path=root / 'validation', size=size, batch_size=args.batch_size) elif args.mode == 'predict_test': utils.load_best_model(model, root) test_root = utils.DATA_ROOT / 'testing' predict(model, test_root, out_path=root / 'testing', size=size, batch_size=args.batch_size)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--bg-weight', type=float, default=1.0, help='background weight') arg('--dice-weight', type=float, default=0.0) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--oversample', type=float, default=0.0, help='sample near lion with given probability') arg('--with-head', action='store_true') arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args) root = Path(args.root) model = UNetWithHead() if args.with_head else UNet() model = utils.cuda(model) criterion = Loss(dice_weight=args.dice_weight, bg_weight=args.bg_weight) loader_kwargs = dict( min_scale=args.min_scale, max_scale=args.max_scale, downscale=args.with_head, ) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( SegmentationDataset, args, train_paths, coords, oversample=args.oversample, **loader_kwargs), utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) # remove dir tree root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation')) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=root, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale, downsampled=args.with_head) elif args.mode == 'predict_test': out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.png') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True, downsampled=args.with_head) else: parser.error('Unexpected mode {}'.format(args.mode))
def validate(self): training = self.model.training self.model.eval() n_class = len(self.val_loader.dataset.class_names) val_loss = 0 visualizations = [] label_trues, label_preds = [], [] for batch_idx, (data, target) in tqdm.tqdm(enumerate(self.val_loader), total=len(self.val_loader), desc='Valid iteration=%d' % self.iteration, ncols=80, leave=False): if self.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) score = self.model(data) score = torch.squeeze(score, 1) creterion = Loss() loss = creterion(score, target) #if np.isnan(float(loss.data[0])): # raise ValueError('loss is nan while validating') #val_loss += float(loss.data[0]) / len(data) val_loss += float(loss.data[0]) imgs = data.data.cpu() mask1 = (F.sigmoid(score.data)) > 0.5 mask2 = (F.sigmoid(score.data)) <= 0.5 score.data[mask1] = 1 score.data[mask2] = 0 lbl_pred = score.data.cpu().numpy()[:, :, :] lbl_pred = lbl_pred.astype(int) lbl_true = target.data.cpu() for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = self.val_loader.dataset.untransform(img, lt) label_trues.append(lt) label_preds.append(lp) ''' if len(visualizations) < 9: viz = fcn.utils.visualize_segmentation( lbl_pred=lp, lbl_true=lt, img=img, n_class=n_class) visualizations.append(viz) ''' metrics = utils.label_accuracy_score(label_trues, label_preds, n_class) ''' out = osp.join(self.out, 'visualization_viz') if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, 'iter%012d.jpg' % self.iteration) scipy.misc.imsave(out_file, fcn.utils.get_tile_image(visualizations)) ''' val_loss /= len(self.val_loader) with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('Asia/Tokyo')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [''] * 5 + \ [val_loss] + list(metrics) + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.model.__class__.__name__, #'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, }, osp.join(self.out, 'checkpoint.pth.tar')) if is_best: shutil.copy(osp.join(self.out, 'checkpoint.pth.tar'), osp.join(self.out, 'model_best.pth.tar')) if training: self.model.train()
def train_epoch(self): self.model.train() n_class = len(self.train_loader.dataset.class_names) for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.train_loader), total=len(self.train_loader), desc='Train epoch=%d' % self.epoch, ncols=80, leave=False): iteration = batch_idx + self.epoch * len(self.train_loader) if self.iteration != 0 and (iteration - 1) != self.iteration: continue # for resuming self.iteration = iteration if self.iteration % self.interval_validate == 0: self.validate() assert self.model.training if self.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) #lr=cyclic_lr(self.epoch) #self.optim=self.opt(lr) for optimizer in self.optim: optimizer.zero_grad() score = self.model(data) score = torch.squeeze(score, 1) creterion = Loss() loss = creterion(score, target) #loss /= len(data) #if np.isnan(float(loss.data[0])): # raise ValueError('loss is nan while training') loss.backward() for optimizer in self.optim: optimizer.step() #self.optim.step() metrics = [] mask1 = (F.sigmoid(score.data)) > 0.5 mask2 = (F.sigmoid(score.data)) <= 0.5 score.data[mask1] = 1 score.data[mask2] = 0 lbl_pred = score.data.cpu().numpy()[:, :, :] lbl_pred = lbl_pred.astype(int) lbl_true = target.data.cpu().numpy() acc, acc_cls, mean_iu, fwavacc = \ utils.label_accuracy_score( lbl_true, lbl_pred, n_class=n_class) metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('Asia/Tokyo')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [loss.data[0]] + \ metrics.tolist() + [''] * 5 + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') if self.iteration >= self.max_iter: break