def get_pretrained_saliency_fn(cuda=True, return_classification_logits=False): """ returns a saliency function that takes images and class selectors as inputs. If cuda=True then places the model on a GPU. You can also specify model_confidence - smaller values (~0) will show any object in the image that even slightly resembles the specified class while higher values (~5) will show only the most salient parts. Params of the saliency function: images - input images of shape (C, H, W) or (N, C, H, W) if in batch. Can be either a numpy array, a Tensor or a Variable selectors - class ids to be masked. Can be either an int or an array with N integers. Again can be either a numpy array, a Tensor or a Variable model_confidence - a float, 6 by default, you may want to decrease this value to obtain more complete saliency maps. returns a Variable of shape (N, 1, H, W) with one saliency maps for each input image. """ saliency = SaliencyModel( resnet50encoder(pretrained=True), 5, 64, 3, 64, fix_encoder=True, use_simple_activation=False, allow_selector=True, ) saliency.minimialistic_restore( os.path.join(os.path.dirname(__file__), "minsaliency")) saliency.train(False) if cuda: saliency = saliency.cuda() def fn(images, selectors, model_confidence=6): _images, _selectors = ( to_batch_variable(images, 4, cuda).float(), to_batch_variable(selectors, 1, cuda).long(), ) masks, _, cls_logits = saliency(_images * 2, _selectors, model_confidence=model_confidence) sal_map = F.upsample(masks, (_images.size(2), _images.size(3)), mode="bilinear") if not return_classification_logits: return sal_map return sal_map, cls_logits return fn
import pycat import torch.nn as nn import matplotlib.pyplot as plt from PIL import Image import copy import time device=torch.device("cuda" if torch.cuda.is_available() else "cpu") dts = imagenet_dataset black_box_fn = get_black_box_fn(model_zoo_model=resnet50) val_dts = dts.get_val_dataset() allow_selector = True ' ---------------------------------------------------Testing with Mask Estimator -------------------------------------------' batch_size=16 val_datas = dts.get_loader(val_dts, batch_size=batch_size, Shuffle=True) saliency = SaliencyModel(resnet50encoder(pretrained=True, require_out_grad=False), 5, 64, 3, 64, fix_encoder=True, use_simple_activation=False, allow_selector=allow_selector, num_classes=1000) load_path='./yoursaliencymodel' saliency.to_saliency_chans=Distribution_Controller() saliency.minimialistic_restore(os.path.join(os.path.dirname(__file__), (load_path))) saliency.train(False) saliency_p = saliency.to(device) for it_step, batch in enumerate(val_datas): images, _, paths = batch images=images.to(device) outputs = saliency.encoder(images) feature_conv5=outputs[5] logits=outputs[-1] _, targets=torch.max(logits,dim=1) with torch.no_grad(): outputs = saliency_p(images)
from torchvision.models.resnet import resnet50 import pycat # ---- config ---- # You can choose your own dataset and a black box classifier as long as they are compatible with the ones below. # The training code does not need to be changed and the default values should work well for high resolution ~300x300 real-world images. # By default we train on 224x224 resolution ImageNet images with a resnet50 black box classifier. dts = imagenet_dataset black_box_fn = get_black_box_fn(model_zoo_model=resnet50) # ---------------- train_dts = dts.get_train_dataset() val_dts = dts.get_val_dataset() # Default saliency model with pretrained resnet50 feature extractor, produces saliency maps which have resolution 4 times lower than the input image. saliency = SaliencyModel(resnet50encoder(pretrained=True), 5, 64, 3, 64, fix_encoder=True, use_simple_activation=False, allow_selector=True) saliency_p = nn.DataParallel(saliency).cuda() saliency_loss_calc = SaliencyLoss( black_box_fn, smoothness_loss_coef=0.005 ) # model based saliency requires very small smoothness loss and therefore can produce very sharp masks optim_phase1 = torch_optim.Adam(saliency.selector_module.parameters(), 0.001, weight_decay=0.0001)
import pycat # ---- config ---- # You can choose your own dataset and a black box classifier as long as they are compatible with the ones below. # The training code does not need to be changed and the default values should work well for high resolution ~300x300 real-world images. # By default we train on 224x224 resolution ImageNet images with a resnet50 black box classifier. dts = imagenet_dataset black_box_fn = get_black_box_fn(model_zoo_model=resnet50) # ---------------- train_dts = dts.get_train_dataset() val_dts = dts.get_val_dataset() # Default saliency model with pretrained resnet50 feature extractor, produces saliency maps which have resolution 4 times lower than the input image. saliency = SaliencyModel( resnet50encoder(pretrained=True), 5, 64, 3, 64, fix_encoder=True, use_simple_activation=False, allow_selector=True, ) saliency_p = nn.DataParallel(saliency).cuda() saliency_loss_calc = SaliencyLoss( black_box_fn, smoothness_loss_coef=0.005 ) # model based saliency requires very small smoothness loss and therefore can produce very sharp masks optim_phase1 = torch_optim.Adam(saliency.selector_module.parameters(), 0.001,