def _work(process_id, config): transform = get_transforms(config["data"]["transform"]) dataset = ImageNetMLC(config["data"]["path"], transform) subsets = split_dataset(dataset, config["num_workers"]) dataloader = DataLoader(subsets[process_id], shuffle=False, pin_memory=False) for iteration, (X, y, name) in enumerate(dataloader): X, name = X[0].numpy().transpose((1, 2, 0)), name[0] if os.path.exists(os.path.join(config["data"]["output_path"], name + ".png")): continue cam_dict = np.load( os.path.join(config["data"]["cams_dir"], name + ".npy"), allow_pickle=True ).item() cams = cam_dict["high_res"] keys = np.pad(cam_dict["keys"] + 1, (1, 0), mode="constant") fg_conf_cam = np.pad( cams, ((1, 0), (0, 0), (0, 0)), mode="constant", constant_values=config["conf_fg_threshold"], ) fg_conf_cam = np.argmax(fg_conf_cam, axis=0) pred = crf_inference(X, fg_conf_cam, n_labels=keys.shape[0]) fg_conf = keys[pred] bg_conf_cam = np.pad( cams, ((1, 0), (0, 0), (0, 0)), mode="constant", constant_values=config["conf_bg_threshold"], ) bg_conf_cam = np.argmax(bg_conf_cam, axis=0) pred = crf_inference(X, bg_conf_cam, n_labels=keys.shape[0]) bg_conf = keys[pred] conf = fg_conf.copy() conf[fg_conf == 0] = 255 # not confident area conf[(bg_conf + fg_conf) == 0] = 0 # confident background if config["rescale_output"]: # rescale to original size p_orig = "/".join(config["data"]["path"].split("/")[:-1]) orig = cv2.imread(p_orig + "/" + name + ".JPEG") conf = cv2.resize( conf.astype("float32"), (orig.shape[1], orig.shape[0]), interpolation=cv2.INTER_NEAREST, ) cv2.imwrite( os.path.join(config["data"]["output_path"], name + ".png"), conf.astype(np.uint8), ) if iteration % 500 == 0: print( f"Process: {process_id}, Iteration: {iteration}/{len(subsets[process_id])}" )
def _work(process_id, config): def __horizontal_flip_tta(x, y, tta=True): maps, y_predicted = extractor.forward(x, y) if tta: maps_flipped, _ = extractor.forward(x.flip(-1), y) return maps + maps_flipped.flip(-1), y_predicted else: return maps, y_predicted device = config["devices"][process_id] scales = config["scales"] full_size = config["data"]["transform"]["size"] small_size = full_size // 4 extractor = get_cam_grad_extractor(config, device) transform = get_transforms(config["data"]["transform"]) dataset = ImageNetMLC(config["data"]["path"], transform) subsets = split_dataset(dataset, len(config["devices"])) dataloader = DataLoader(subsets[process_id], shuffle=False, pin_memory=False) for iteration, (X, y, name) in enumerate(dataloader): X, y, name = ( X.to(device, non_blocking=True), y.to(device, non_blocking=True), name[0], ) if os.path.exists( os.path.join(config["data"]["output_path"], name + ".npy")): continue if y.sum() == 0: y = None _, y_pred = __horizontal_flip_tta( x=F.interpolate(X, scale_factor=1.0, mode="bilinear", align_corners=False), y=y, tta=False, ) label_encoded = torch.zeros(config["model"]["classes"], dtype=torch.float32) label_encoded[y_pred.data.cpu().numpy()] = 1 y = label_encoded.unsqueeze_(0) outputs = [ __horizontal_flip_tta( x=F.interpolate(X, scale_factor=scale, mode="bilinear", align_corners=False), y=y, )[0] for scale in scales ] highres_cam = torch.sum( torch.stack( [ F.interpolate( output[None], size=(full_size, full_size), mode="bilinear", align_corners=False, ) for output in outputs ], dim=0, ), dim=0, ) lowres_cam = torch.sum( torch.stack( [ F.interpolate( output[None], size=(small_size, small_size), mode="bilinear", align_corners=False, ) for output in outputs ], dim=0, ), dim=0, ) highres_cam = highres_cam.relu_() / highres_cam.max() lowres_cam = lowres_cam.relu_() / lowres_cam.max() np.save( os.path.join(config["data"]["output_path"], name + ".npy"), { "keys": torch.where(y)[1].cpu().numpy(), "cam": lowres_cam[0].cpu().numpy(), "high_res": highres_cam[0].cpu().numpy(), }, ) if iteration % 100 == 0: print( f"Device: {process_id}, Iteration: {iteration}/{len(subsets[process_id])}" )
import os from model_training.common.trainer import Trainer from model_training.common.datasets import PascalCRFSegmentationDataset from model_training.common.augmentations import get_transforms np.random.seed(0) torch.manual_seed(0) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False with open(os.path.join(os.path.dirname(__file__), "config", "pascal_crf.yaml")) as config_file: config = yaml.full_load(config_file) train_transform = get_transforms(config["train"]["transform"]) val_transform = get_transforms(config["val"]["transform"]) train_ds = PascalCRFSegmentationDataset( config["train"]["path"], transform=train_transform, image_set="train", masks_folder=config["train"]["masks"], scale_factor=config["crf"]["scale_factor"], ) val_ds = PascalCRFSegmentationDataset( config["val"]["path"], transform=val_transform, image_set="validation", masks_folder=config["val"]["masks"], scale_factor=config["crf"]["scale_factor"],
import torch import os from model_training.common.datasets import PascalSegmentationDataset from model_training.common.augmentations import get_transforms from inference.segmentation.evaluator import Evaluator with open(os.path.join(os.path.dirname(__file__), "config", "eval.yaml")) as config_file: config = yaml.full_load(config_file) train_config_path = os.path.join(config["experiment_path"], "config.yaml") with open(os.path.join(train_config_path)) as train_config_file: train_config = yaml.full_load(train_config_file) transform = get_transforms(train_config["val"]["transform"]) train_ds = PascalSegmentationDataset( train_config["train"]["path"], transform=transform, image_set="train", masks_folder="out_masks/cam", return_original_mask=True, ) val_ds = PascalSegmentationDataset( train_config["val"]["path"], transform=transform, image_set="validation", masks_folder="../VOCdevkit/VOC2012/SegmentationClass", return_original_mask=True, )
def _work(process_id, config): device = config["devices"][process_id] model = get_network(config["model"]) model.load_state_dict( torch.load(config["model"]["weights_path"], map_location=device)["model"]) model.to(device) model.eval() transform = get_transforms(config["data"]["transform"]) dataset = ImageNetMLC(config["data"]["path"], transform, return_size=True) subsets = split_dataset(dataset, len(config["devices"])) dataloader = DataLoader(subsets[process_id], shuffle=False, pin_memory=False) with torch.no_grad(): for iteration, (X, y, name, orig_size) in enumerate(dataloader): X, y, name, orig_size = ( X.to(device, non_blocking=True), y.to(device, non_blocking=True), name[0], orig_size[0], ) if os.path.exists( os.path.join(config["data"]["output_path"], name + ".npy")): continue X_tta = torch.cat([X, X.flip(-1)], dim=0) edge, _ = model(X_tta) edge = torch.sigmoid(edge[0] / 2 + edge[1].flip(-1) / 2) cam_dict = np.load(config["data"]["cam_path"] + "/" + name + ".npy", allow_pickle=True).item() cams = torch.from_numpy(cam_dict["cam"]).to(device) keys = np.pad(cam_dict["keys"] + 1, (1, 0), mode="constant") cams = F.interpolate( cams.unsqueeze(1), size=edge.shape[1:], mode="bilinear", align_corners=False, ).squeeze(1) rw = indexing.propagate_to_edge( cams, edge, beta=config["beta"], exp_times=config["exp_times"], radius=5, device=device, ) rw_up = F.interpolate( rw, size=(orig_size[0], orig_size[1]), mode="bilinear", align_corners=False, ) rw_up = rw_up.relu_() / torch.max(rw_up) np.save( os.path.join(config["data"]["output_path"], name + ".npy"), { "keys": keys, "map": rw_up.squeeze(0).cpu().numpy() }, ) # rw_up_bg = F.pad(rw_up, (0, 0, 0, 0, 1, 0), value=config['sem_seg_bg_thres'])[0] # rw_pred = torch.argmax(rw_up_bg, dim=0).cpu().numpy() # rw_pred = keys[rw_pred] if iteration % 100 == 0: print( f"Device: {process_id}, Iteration: {iteration}/{len(subsets[process_id])}" )
def run(args): path_index = indexing.PathIndex(radius=10, default_size=(args.irn_crop_size // 4, args.irn_crop_size // 4)) model = getattr(importlib.import_module(args.irn_network), 'AffinityDisplacementLoss')(path_index) transform_config = { 'augmentation_scope': 'horizontal_flip', 'images_normalization': 'default', 'images_output_format_type': 'float', 'masks_normalization': 'none', 'masks_output_format_type': 'byte', 'size': 512, 'size_transform': 'resize' } transform = get_transforms(transform_config) train_dataset = voc12.dataloader.PneumothoraxAffinityDataset( '/datasets/LID/Pneumothorax/train/train_all_positive.csv', transform=transform, indices_from=path_index.src_indices, indices_to=path_index.dst_indices, ) train_data_loader = DataLoader(train_dataset, batch_size=args.irn_batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) max_step = (len(train_dataset) // args.irn_batch_size) * args.irn_num_epoches param_groups = model.trainable_parameters() optimizer = torchutils.PolyOptimizer([{ 'params': param_groups[0], 'lr': 1 * args.irn_learning_rate, 'weight_decay': args.irn_weight_decay }, { 'params': param_groups[1], 'lr': 10 * args.irn_learning_rate, 'weight_decay': args.irn_weight_decay }], lr=args.irn_learning_rate, weight_decay=args.irn_weight_decay, max_step=max_step) model = torch.nn.DataParallel(model.cuda(1), device_ids=['cuda:1', 'cuda:2']) model.train() avg_meter = pyutils.AverageMeter() timer = pyutils.Timer() for ep in range(args.irn_num_epoches): print('Epoch %d/%d' % (ep + 1, args.irn_num_epoches)) for iter, pack in enumerate(train_data_loader): img = pack['img'] bg_pos_label = pack['aff_bg_pos_label'].cuda(1, non_blocking=True) fg_pos_label = pack['aff_fg_pos_label'].cuda(1, non_blocking=True) neg_label = pack['aff_neg_label'].cuda(1, non_blocking=True) pos_aff_loss, neg_aff_loss, dp_fg_loss, dp_bg_loss = model( img, True) bg_pos_aff_loss = torch.sum( bg_pos_label * pos_aff_loss) / (torch.sum(bg_pos_label) + 1e-5) fg_pos_aff_loss = torch.sum( fg_pos_label * pos_aff_loss) / (torch.sum(fg_pos_label) + 1e-5) pos_aff_loss = bg_pos_aff_loss / 2 + fg_pos_aff_loss / 2 neg_aff_loss = torch.sum( neg_label * neg_aff_loss) / (torch.sum(neg_label) + 1e-5) dp_fg_loss = torch.sum(dp_fg_loss * torch.unsqueeze( fg_pos_label, 1)) / (2 * torch.sum(fg_pos_label) + 1e-5) dp_bg_loss = torch.sum(dp_bg_loss * torch.unsqueeze( bg_pos_label, 1)) / (2 * torch.sum(bg_pos_label) + 1e-5) avg_meter.add({ 'loss1': pos_aff_loss.item(), 'loss2': neg_aff_loss.item(), 'loss3': dp_fg_loss.item(), 'loss4': dp_bg_loss.item() }) total_loss = (pos_aff_loss + neg_aff_loss) / 2 + (dp_fg_loss + dp_bg_loss) / 2 optimizer.zero_grad() total_loss.backward() optimizer.step() if (optimizer.global_step - 1) % 50 == 0: timer.update_progress(optimizer.global_step / max_step) print('step:%5d/%5d' % (optimizer.global_step - 1, max_step), 'loss:%.4f %.4f %.4f %.4f' % (avg_meter.pop('loss1'), avg_meter.pop('loss2'), avg_meter.pop('loss3'), avg_meter.pop('loss4')), 'imps:%.1f' % ((iter + 1) * args.irn_batch_size / timer.get_stage_elapsed()), 'lr: %.4f' % (optimizer.param_groups[0]['lr']), 'etc:%s' % (timer.str_estimated_complete()), flush=True) else: timer.reset_stage() transform_config = { 'augmentation_scope': 'none', 'images_normalization': 'default', 'images_output_format_type': 'float', 'size': 512, 'size_transform': 'resize' } transform = get_transforms(transform_config) infer_dataset = voc12.dataloader.PneumothoraxImageDataset( '/datasets/LID/Pneumothorax/train/train_all_positive.csv', transform=transform) infer_data_loader = DataLoader(infer_dataset, batch_size=args.irn_batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True, drop_last=True) model.eval() print('Analyzing displacements mean ... ', end='') dp_mean_list = [] with torch.no_grad(): for iter, pack in enumerate(infer_data_loader): img = pack['img'] aff, dp = model(img, False) dp_mean_list.append(torch.mean(dp, dim=(0, 2, 3)).cpu()) model.module.mean_shift.running_mean = torch.mean( torch.stack(dp_mean_list), dim=0) print('done.') torch.save(model.module.state_dict(), args.irn_weights_name) torch.cuda.empty_cache()
return torch.from_numpy(bg_pos_affinity_label), torch.from_numpy(fg_pos_affinity_label), \ torch.from_numpy(neg_affinity_label) if __name__ == '__main__': from model_training.common.augmentations import get_transforms transform_config = { 'size': 512, 'augmentation_scope': 'strong', 'images_normalization': 'default', 'images_output_format_type': 'float', 'size_transform': 'resize', 'masks_normalization': 'none', 'masks_output_format_type': 'numpy' } transform = get_transforms(transform_config) ds = ImageNetAffinityDataset( images_list_path= '/datasets/LID/ILSVRC/Data/DET/train/train_balanced.json', transform=transform, mask_dir='/datasets/LID/ILSVRC/Data/DET/train/outputs/irn_label', image_output_size=512) pack = ds[0] print(pack['image'].shape) print(pack['name']) print(len(pack['aff_bg_pos_label']))
def run(args): model = getattr(importlib.import_module(args.irn_network), 'EdgeDisplacement')() model.load_state_dict(torch.load(args.irn_weights_name), strict=False) model.eval() n_gpus = 2 transform_config = { 'size': 1024, 'augmentation_scope': 'none', 'images_normalization': 'default', 'images_output_format_type': 'float', 'size_transform': 'resize' } transform = get_transforms(transform_config) dataset = voc12.dataloader.PneumothoraxMSDataset( '/datasets/LID/Pneumothorax/train/val.csv', transform=transform, scales=(1.0, 0.75, 0.5, 0.25)) data_loader = DataLoader(dataset, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False) with torch.no_grad(), cuda.device(1): model.cuda() for iter, pack in tqdm.tqdm(enumerate(data_loader)): img_name = pack['name'][0] if os.path.exists( os.path.join(args.sem_seg_out_dir, img_name + '.npy')): continue orig_img_size = np.array([512, 512]) x = torch.cat([pack['img'][2], pack['img'][2].flip(-1)], dim=0) edge, dp = model(x.cuda(non_blocking=True)) cam_dict = np.load(args.cam_out_dir + '/' + img_name + '.npy', allow_pickle=True).item() cams = cam_dict['cam'] cam_downsized_values = cams.cuda() rw = indexing.propagate_to_edge(cam_downsized_values, edge, beta=args.beta, exp_times=args.exp_times, radius=5) rw_up = F.interpolate( rw, scale_factor=4, mode='bilinear', align_corners=False)[..., 0, :orig_img_size[0], :orig_img_size[1]] rw_up = rw_up / torch.max(rw_up) np.save(os.path.join(args.sem_seg_out_dir, img_name + '.npy'), rw_up.cpu().numpy()) torch.cuda.empty_cache()