def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('model_type', choices=ALL_MCONFIGS.keys()) parser.add_argument('checkpoint', type=str, help='The path to the checkpoint. ' 'This can be a relative path (relative to cfg.MODELS_PATH) ' 'or an absolute path. The file extension can be omitted.') parser.add_argument( '--images', type=str, help='Path to directory with .jpg images to get predictions for.' ) parser.add_argument( '--masks', type=str, help='Path to directory with .png binary masks for images, named exactly like images without last _postfix.' ) parser.add_argument( '--resize', type=int, default=256, help='Resize image to a given size before feeding it into the network. If -1 the network input is not resized.' ) parser.add_argument( '--original-size', action='store_true', default=False, help='Resize predicted image back to the original size.' ) parser.add_argument('--gpu', type=str, default=0, help='ID of used GPU.') parser.add_argument('--config-path', type=str, default='./config.yml', help='The path to the config file.') parser.add_argument( '--results-path', type=str, default='', help='The path to the harmonized images. Default path: cfg.EXPS_PATH/predictions.' ) parser.add_argument( '--test-path', type=str, help='Path to text file identifying tests ' ) parser.add_argument( '--color-transfer', action='store_true', default=False, help='Use color transfer to get a full-resolution image' ) parser.add_argument( '--num-inputs', type=int, default=-1, help='Number of inputs to run' ) args = parser.parse_args() cfg = load_config_file(args.config_path, return_edict=True) cfg.EXPS_PATH = Path(cfg.EXPS_PATH) cfg.RESULTS_PATH = Path(args.results_path) if len(args.results_path) else cfg.EXPS_PATH / 'predictions' cfg.RESULTS_PATH.mkdir(parents=True, exist_ok=True) logger.info(cfg) return args, cfg
def main(): args, cfg = parse_args() device = torch.device(f'cuda:{args.gpu}') checkpoint_path = find_checkpoint(cfg.MODELS_PATH, args.checkpoint) net = load_model(args.model_type, checkpoint_path, verbose=True) predictor = Predictor(net, device) image_names = os.listdir(args.images) def _save_image(image_name, bgr_image): rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_RGB2BGR) cv2.imwrite( str(cfg.RESULTS_PATH / f'{image_name}'), rgb_image, [cv2.IMWRITE_JPEG_QUALITY, 85] ) logger.info(f'Save images to {cfg.RESULTS_PATH}') resize_shape = (args.resize, ) * 2 for image_name in tqdm(image_names): image_path = osp.join(args.images, image_name) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_size = image.shape if resize_shape[0] > 0: image = cv2.resize(image, resize_shape, cv2.INTER_LINEAR) mask_path = osp.join(args.masks, '_'.join(image_name.split('_')[:-1]) + '.png') mask_image = cv2.imread(mask_path) if resize_shape[0] > 0: mask_image = cv2.resize(mask_image, resize_shape, cv2.INTER_LINEAR) mask = mask_image[:, :, 0] mask[mask <= 100] = 0 mask[mask > 100] = 1 mask = mask.astype(np.float32) pred = predictor.predict(image, mask) if args.original_size: pred = cv2.resize(pred, image_size[:-1][::-1]) _save_image(image_name, pred)
def load_pretrained_weights(self, pretrained_path=''): model_dict = self.state_dict() if not os.path.exists(pretrained_path): print(f'\nFile "{pretrained_path}" does not exist.') print('You need to specify the correct path to the pre-trained weights.\n' 'You can download the weights for HRNet from the repository:\n' 'https://github.com/HRNet/HRNet-Image-Classification') exit(1) pretrained_dict = torch.load(pretrained_path, map_location={'cuda:0': 'cpu'}) pretrained_dict = {k.replace('last_layer', 'aux_head').replace('model.', ''): v for k, v in pretrained_dict.items()} params_count = len(pretrained_dict) pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} logger.info(f'Loaded {len(pretrained_dict)} of {params_count} pretrained parameters for HRNet') model_dict.update(pretrained_dict) self.load_state_dict(model_dict)
def get_optimizer(model, opt_name, opt_kwargs): params = [] base_lr = opt_kwargs['lr'] for name, param in model.named_parameters(): param_group = {'params': [param]} if not param.requires_grad: params.append(param_group) continue if not math.isclose(getattr(param, 'lr_mult', 1.0), 1.0): logger.info( f'Applied lr_mult={param.lr_mult} to "{name}" parameter.') param_group['lr'] = param_group.get('lr', base_lr) * param.lr_mult params.append(param_group) optimizer = { 'sgd': torch.optim.SGD, 'adam': torch.optim.Adam, 'adamw': torch.optim.AdamW }[opt_name.lower()](params, **opt_kwargs) return optimizer
def main(): args, cfg = parse_args() checkpoint_path = find_checkpoint(cfg.MODELS_PATH, args.checkpoint) add_new_file_output_to_logger(logs_path=Path(cfg.EXPS_PATH) / 'evaluation_logs', prefix=f'{Path(checkpoint_path).stem}_', only_message=True) logger.info(vars(args)) device = torch.device(f'cuda:{args.gpu}') net = load_model(args.model_type, checkpoint_path, verbose=True) predictor = Predictor(net, device, with_flip=args.use_flip) datasets_names = args.datasets.split(',') datasets_metrics = [] for dataset_indx, dataset_name in enumerate(datasets_names): dataset = HDataset(cfg.get(f'{dataset_name.upper()}_PATH'), split='test', augmentator=HCompose( [RESIZE_STRATEGIES[args.resize_strategy]]), keep_background_prob=-1) dataset_metrics = MetricsHub( [N(), MSE(), fMSE(), PSNR(), AvgPredictTime()], name=dataset_name) evaluate_dataset(dataset, predictor, dataset_metrics) datasets_metrics.append(dataset_metrics) if dataset_indx == 0: logger.info(dataset_metrics.get_table_header()) logger.info(dataset_metrics) if len(datasets_metrics) > 1: overall_metrics = sum(datasets_metrics, MetricsHub([], 'Overall')) logger.info('-' * len(str(overall_metrics))) logger.info(overall_metrics)
def train(model, cfg, model_cfg, start_epoch=0): cfg.batch_size = 16 if cfg.batch_size < 1 else cfg.batch_size cfg.val_batch_size = cfg.batch_size cfg.input_normalization = model_cfg.input_normalization crop_size = model_cfg.crop_size loss_cfg = edict() loss_cfg.pixel_loss = MaskWeightedMSE() loss_cfg.pixel_loss_weight = 1.0 num_epochs = 180 train_augmentator = HCompose([ LongestMaxSizeIfLarger(1024), HorizontalFlip(), PadIfNeeded(min_height=crop_size[0], min_width=crop_size[1], border_mode=0), RandomCrop(*crop_size) ]) val_augmentator = HCompose([ LongestMaxSizeIfLarger(1024), PadIfNeeded(min_height=crop_size[0], min_width=crop_size[1], border_mode=0), RandomCrop(*crop_size) ]) trainset = ComposeDataset([ HDataset(cfg.HFLICKR_PATH, split='train'), HDataset(cfg.HDAY2NIGHT_PATH, split='train'), HDataset(cfg.HCOCO_PATH, split='train'), HDataset(cfg.HADOBE5K_PATH, split='train'), ], augmentator=train_augmentator, input_transform=model_cfg.input_transform) valset = ComposeDataset([ HDataset(cfg.HFLICKR_PATH, split='test'), HDataset(cfg.HDAY2NIGHT_PATH, split='test'), HDataset(cfg.HCOCO_PATH, split='test'), ], augmentator=val_augmentator, input_transform=model_cfg.input_transform) optimizer_params = {'lr': 1e-3, 'betas': (0.9, 0.999), 'eps': 1e-8} lr_scheduler = partial(torch.optim.lr_scheduler.MultiStepLR, milestones=[160, 175], gamma=0.1) trainer = SimpleHTrainer( model, cfg, model_cfg, loss_cfg, trainset, valset, optimizer='adam', optimizer_params=optimizer_params, lr_scheduler=lr_scheduler, metrics=[ PSNRMetric('images', 'target_images'), DenormalizedPSNRMetric( 'images', 'target_images', mean=torch.tensor(cfg.input_normalization['mean'], dtype=torch.float32).view(1, 3, 1, 1), std=torch.tensor(cfg.input_normalization['std'], dtype=torch.float32).view(1, 3, 1, 1), ), DenormalizedMSEMetric( 'images', 'target_images', mean=torch.tensor(cfg.input_normalization['mean'], dtype=torch.float32).view(1, 3, 1, 1), std=torch.tensor(cfg.input_normalization['std'], dtype=torch.float32).view(1, 3, 1, 1), ) ], checkpoint_interval=5, image_dump_interval=500) logger.info(f'Starting Epoch: {start_epoch}') logger.info(f'Total Epochs: {num_epochs}') for epoch in range(start_epoch, num_epochs): trainer.training(epoch) trainer.validation(epoch)
def main(): args, cfg = parse_args() if args.gpu == "-1": device = torch.device('cpu') else: device = torch.device(f'cuda:{args.gpu}') checkpoint_path = find_checkpoint(cfg.MODELS_PATH, args.checkpoint) net = load_model(args.model_type, checkpoint_path, verbose=True) predictor = Predictor(net, device) image_names = os.listdir(args.images) def _save_image(image_name, bgr_image, flag=""): file_name = image_name.split(".")[0] path = os.path.join(cfg.RESULTS_PATH, file_name + flag + ".jpg") cv2.imwrite(path, bgr_image, [cv2.IMWRITE_JPEG_QUALITY, 85]) logger.info(f'Save images to {cfg.RESULTS_PATH}') already_processed = os.listdir(cfg.RESULTS_PATH) with open(args.test_path, "r") as f: test_files = [l.strip() for l in f.readlines()] num_processed = 0 for image_name in tqdm(image_names): if args.num_inputs > 0 and num_processed == args.num_inputs: break if image_name.split(".")[-1] != "jpg": continue if image_name not in test_files: continue file_name = image_name.split(".")[0] + "_model_output.jpg" if file_name in already_processed: print(image_name + " already done") continue print(image_name + " running") image_path = osp.join(args.images, image_name) image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) og_image = image.copy() image_size = image.shape divisor = 128 if args.resize > 0: def f(h, w, r, d=128): if h > w: f = round(h / r) new_h = d * (r // d) new_w = d * ((w // f) // d) else: f = round(w / r) new_h = d * ((h // f) // d) new_w = d * (r // d) return max(new_h, 256), max(new_w, 256) h, w = f(image_size[0], image_size[1], args.resize) print("Image shape:", image_size) print("New shape:", (h, w)) resize_shape = (w, h) if args.resize > 0: image = cv2.resize(image, resize_shape, cv2.INTER_LINEAR) else: h, w, _ = image.shape shape = (divisor * (w // divisor), divisor * (h // divisor)) image = cv2.resize(image, shape, cv2.INTER_LINEAR) mask_path = osp.join(args.masks, '_'.join(image_name.split('_')[:-1]) + '.png') mask_image = cv2.imread(mask_path) og_mask = mask_image.copy() if args.resize > 0: mask_image = cv2.resize(mask_image, resize_shape, cv2.INTER_LINEAR) else: h, w, _ = mask_image.shape shape = (divisor * (w // divisor), divisor * (h // divisor)) mask_image = cv2.resize(mask_image, shape, cv2.INTER_LINEAR) mask = mask_image[:, :, 0] mask[mask <= 100] = 0 mask[mask > 100] = 1 mask = mask.astype(np.float32) # Expects RGB! pred = predictor.predict(image, mask) if args.original_size: pred = cv2.resize(pred, image_size[:-1][::-1]) bgr_pred = cv2.cvtColor(pred, cv2.COLOR_RGB2BGR).astype(np.uint8) _save_image(image_name, bgr_pred, flag="_model_output") if args.color_transfer: assert(args.original_size) # Expects RGB! output = transfer_Lab_statistics(og_image, pred, og_mask) output = cv2.cvtColor(output.astype(np.uint8), cv2.COLOR_RGB2BGR) _save_image(image_name, output, flag="_transfered_Lab") num_processed += 1
def main(): args, cfg = parse_args() checkpoint_path = find_checkpoint(cfg.MODELS_PATH, args.checkpoint) add_new_file_output_to_logger(logs_path=Path(cfg.EXPS_PATH) / 'evaluation_results', prefix=f'{Path(checkpoint_path).stem}_', only_message=True) logger.info(vars(args)) device = torch.device(f'cuda:{args.gpu}') net = load_model(args.model_type, checkpoint_path, verbose=True) predictor = Predictor(net, device, with_flip=args.use_flip) fg_ratio_intervals = [(0.0, 0.05), (0.05, 0.15), (0.15, 1.0), (0.0, 1.00)] datasets_names = args.datasets.split(',') datasets_metrics = [[] for _ in fg_ratio_intervals] for dataset_indx, dataset_name in enumerate(datasets_names): dataset = HDataset(cfg.get(f'{dataset_name.upper()}_PATH'), split='test', augmentator=HCompose( [RESIZE_STRATEGIES[args.resize_strategy]]), keep_background_prob=-1) dataset_metrics = [] for fg_ratio_min, fg_ratio_max in fg_ratio_intervals: dataset_metrics.append( MetricsHub( [N(), MSE(), fMSE(), PSNR()], name= f'{dataset_name} ({fg_ratio_min:.0%}-{fg_ratio_max:.0%})', name_width=28)) for sample_i in trange(len(dataset), desc=f'Testing on {dataset_name}'): sample = dataset.get_sample(sample_i) sample = dataset.augment_sample(sample) sample_mask = sample['object_mask'] sample_fg_ratio = (sample_mask > 0.5).sum() / ( sample_mask.shape[0] * sample_mask.shape[1]) pred = predictor.predict(sample['image'], sample_mask, return_numpy=False) target_image = torch.as_tensor(sample['target_image'], dtype=torch.float32).to( predictor.device) sample_mask = torch.as_tensor(sample_mask, dtype=torch.float32).to( predictor.device) with torch.no_grad(): for metrics_hub, (fg_ratio_min, fg_ratio_max) in zip(dataset_metrics, fg_ratio_intervals): if fg_ratio_min <= sample_fg_ratio <= fg_ratio_max: metrics_hub.compute_and_add(pred, target_image, sample_mask) for indx, metrics_hub in enumerate(dataset_metrics): datasets_metrics[indx].append(metrics_hub) if dataset_indx == 0: logger.info(dataset_metrics[-1].get_table_header()) for metrics_hub in dataset_metrics: logger.info(metrics_hub) if len(datasets_metrics) > 1: overall_metrics = [ sum( x, MetricsHub([], f'Overall ({fg_ratio_min:.0%}-{fg_ratio_max:.0%})', name_width=28)) for x, (fg_ratio_min, fg_ratio_max) in zip(datasets_metrics, fg_ratio_intervals) ] logger.info('-' * len(str(overall_metrics[-1]))) for x in overall_metrics: logger.info(x)
def __init__(self, model, cfg, model_cfg, loss_cfg, trainset, valset, optimizer='adam', optimizer_params=None, image_dump_interval=200, checkpoint_interval=10, tb_dump_period=25, max_interactive_points=0, lr_scheduler=None, metrics=None, additional_val_metrics=None, net_inputs=('images', 'points')): self.cfg = cfg self.model_cfg = model_cfg self.max_interactive_points = max_interactive_points self.loss_cfg = loss_cfg self.val_loss_cfg = deepcopy(loss_cfg) self.tb_dump_period = tb_dump_period self.net_inputs = net_inputs if metrics is None: metrics = [] self.train_metrics = metrics self.val_metrics = deepcopy(metrics) if additional_val_metrics is not None: self.val_metrics.extend(additional_val_metrics) self.checkpoint_interval = checkpoint_interval self.image_dump_interval = image_dump_interval self.task_prefix = '' self.sw = None self.trainset = trainset self.valset = valset self.train_data = DataLoader(trainset, cfg.batch_size, shuffle=True, drop_last=True, pin_memory=True, num_workers=cfg.workers) self.val_data = DataLoader(valset, cfg.val_batch_size, shuffle=False, drop_last=True, pin_memory=True, num_workers=cfg.workers) self.optim = get_optimizer(model, optimizer, optimizer_params) logger.info(model) self.device = cfg.device self.net = model self._load_weights() if cfg.multi_gpu: self.net = _CustomDP(self.net, device_ids=cfg.gpu_ids, output_device=cfg.gpu_ids[0]) self.net = self.net.to(self.device) self.lr = optimizer_params['lr'] if lr_scheduler is not None: self.lr_scheduler = lr_scheduler(optimizer=self.optim) if cfg.start_epoch > 0: for _ in range(cfg.start_epoch): self.lr_scheduler.step() else: self.lr_scheduler = None self.tqdm_out = TqdmToLogger(logger, level=logging.INFO) if cfg.input_normalization: mean = torch.tensor(cfg.input_normalization['mean'], dtype=torch.float32) std = torch.tensor(cfg.input_normalization['std'], dtype=torch.float32) self.denormalizator = Normalize((-mean / std), (1.0 / std)) else: self.denormalizator = lambda x: x
def train(model, cfg, model_cfg, start_epoch=0): cfg.batch_size = 16 if cfg.batch_size < 1 else cfg.batch_size cfg.val_batch_size = cfg.batch_size cfg.input_normalization = model_cfg.input_normalization loss_cfg = edict() loss_cfg.pixel_loss = MaskWeightedMSE(min_area=100) loss_cfg.pixel_loss_weight = 1.0 num_epochs = 120 train_augmentator = HCompose( [RandomResizedCrop(256, 256, scale=(0.5, 1.0)), HorizontalFlip()]) val_augmentator = HCompose([Resize(256, 256)]) trainset = ComposeDataset([ HDataset(cfg.HFLICKR_PATH, split='train'), HDataset(cfg.HDAY2NIGHT_PATH, split='train'), HDataset(cfg.HCOCO_PATH, split='train'), HDataset(cfg.HADOBE5K_PATH, split='train'), ], augmentator=train_augmentator, input_transform=model_cfg.input_transform, keep_background_prob=0.05) valset = ComposeDataset([ HDataset(cfg.HFLICKR_PATH, split='test'), HDataset(cfg.HDAY2NIGHT_PATH, split='test'), HDataset(cfg.HCOCO_PATH, split='test'), ], augmentator=val_augmentator, input_transform=model_cfg.input_transform, keep_background_prob=-1) optimizer_params = {'lr': 1e-3, 'betas': (0.9, 0.999), 'eps': 1e-8} lr_scheduler = partial(torch.optim.lr_scheduler.MultiStepLR, milestones=[105, 115], gamma=0.1) trainer = SimpleHTrainer( model, cfg, model_cfg, loss_cfg, trainset, valset, optimizer='adam', optimizer_params=optimizer_params, lr_scheduler=lr_scheduler, metrics=[ DenormalizedPSNRMetric( 'images', 'target_images', mean=torch.tensor(cfg.input_normalization['mean'], dtype=torch.float32).view(1, 3, 1, 1), std=torch.tensor(cfg.input_normalization['std'], dtype=torch.float32).view(1, 3, 1, 1), ), DenormalizedMSEMetric( 'images', 'target_images', mean=torch.tensor(cfg.input_normalization['mean'], dtype=torch.float32).view(1, 3, 1, 1), std=torch.tensor(cfg.input_normalization['std'], dtype=torch.float32).view(1, 3, 1, 1), ) ], checkpoint_interval=10, image_dump_interval=1000) logger.info(f'Starting Epoch: {start_epoch}') logger.info(f'Total Epochs: {num_epochs}') for epoch in range(start_epoch, num_epochs): trainer.training(epoch) trainer.validation(epoch)