Beispiel #1
0
def get_pretrained_saliency_fn(cuda=True, return_classification_logits=False):
    """ returns a saliency function that takes images and class selectors as inputs. If cuda=True then places the model on a GPU.
    You can also specify model_confidence - smaller values (~0) will show any object in the image that even slightly resembles the specified class
    while higher values (~5) will show only the most salient parts.
    Params of the saliency function:
    images - input images of shape (C, H, W) or (N, C, H, W) if in batch. Can be either a numpy array, a Tensor or a Variable
    selectors - class ids to be masked. Can be either an int or an array with N integers. Again can be either a numpy array, a Tensor or a Variable
    model_confidence - a float, 6 by default, you may want to decrease this value to obtain more complete saliency maps.

    returns a Variable of shape (N, 1, H, W) with one saliency maps for each input image.
    """
    saliency = SaliencyModel(
        resnet50encoder(pretrained=True),
        5,
        64,
        3,
        64,
        fix_encoder=True,
        use_simple_activation=False,
        allow_selector=True,
    )
    saliency.minimialistic_restore(
        os.path.join(os.path.dirname(__file__), "minsaliency"))
    saliency.train(False)
    if cuda:
        saliency = saliency.cuda()

    def fn(images, selectors, model_confidence=6):
        _images, _selectors = (
            to_batch_variable(images, 4, cuda).float(),
            to_batch_variable(selectors, 1, cuda).long(),
        )
        masks, _, cls_logits = saliency(_images * 2,
                                        _selectors,
                                        model_confidence=model_confidence)
        sal_map = F.upsample(masks, (_images.size(2), _images.size(3)),
                             mode="bilinear")
        if not return_classification_logits:
            return sal_map
        return sal_map, cls_logits

    return fn
Beispiel #2
0
import pycat
import torch.nn as nn
import matplotlib.pyplot as plt
from PIL import Image
import copy
import time
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
dts = imagenet_dataset
black_box_fn = get_black_box_fn(model_zoo_model=resnet50)
val_dts = dts.get_val_dataset()
allow_selector = True

' ---------------------------------------------------Testing with Mask Estimator -------------------------------------------'
batch_size=16
val_datas = dts.get_loader(val_dts, batch_size=batch_size,  Shuffle=True)
saliency = SaliencyModel(resnet50encoder(pretrained=True, require_out_grad=False), 5, 64, 3, 64, fix_encoder=True, use_simple_activation=False, allow_selector=allow_selector, num_classes=1000)
load_path='./yoursaliencymodel'
saliency.to_saliency_chans=Distribution_Controller()
saliency.minimialistic_restore(os.path.join(os.path.dirname(__file__), (load_path)))
saliency.train(False)
saliency_p = saliency.to(device)

for it_step, batch in enumerate(val_datas):
    images, _, paths = batch
    images=images.to(device)
    outputs = saliency.encoder(images)
    feature_conv5=outputs[5]
    logits=outputs[-1]
    _, targets=torch.max(logits,dim=1)
    with torch.no_grad():
        outputs = saliency_p(images)
from torchvision.models.resnet import resnet50
import pycat

# ---- config ----
# You can choose your own dataset and a black box classifier as long as they are compatible with the ones below.
# The training code does not need to be changed and the default values should work well for high resolution ~300x300 real-world images.
# By default we train on 224x224 resolution ImageNet images with a resnet50 black box classifier.
dts = imagenet_dataset
black_box_fn = get_black_box_fn(model_zoo_model=resnet50)
# ----------------

train_dts = dts.get_train_dataset()
val_dts = dts.get_val_dataset()

# Default saliency model with pretrained resnet50 feature extractor, produces saliency maps which have resolution 4 times lower than the input image.
saliency = SaliencyModel(resnet50encoder(pretrained=True),
                         5,
                         64,
                         3,
                         64,
                         fix_encoder=True,
                         use_simple_activation=False,
                         allow_selector=True)

saliency_p = nn.DataParallel(saliency).cuda()
saliency_loss_calc = SaliencyLoss(
    black_box_fn, smoothness_loss_coef=0.005
)  # model based saliency requires very small smoothness loss and therefore can produce very sharp masks
optim_phase1 = torch_optim.Adam(saliency.selector_module.parameters(),
                                0.001,
                                weight_decay=0.0001)
import pycat

# ---- config ----
# You can choose your own dataset and a black box classifier as long as they are compatible with the ones below.
# The training code does not need to be changed and the default values should work well for high resolution ~300x300 real-world images.
# By default we train on 224x224 resolution ImageNet images with a resnet50 black box classifier.
dts = imagenet_dataset
black_box_fn = get_black_box_fn(model_zoo_model=resnet50)
# ----------------

train_dts = dts.get_train_dataset()
val_dts = dts.get_val_dataset()

# Default saliency model with pretrained resnet50 feature extractor, produces saliency maps which have resolution 4 times lower than the input image.
saliency = SaliencyModel(
    resnet50encoder(pretrained=True),
    5,
    64,
    3,
    64,
    fix_encoder=True,
    use_simple_activation=False,
    allow_selector=True,
)

saliency_p = nn.DataParallel(saliency).cuda()
saliency_loss_calc = SaliencyLoss(
    black_box_fn, smoothness_loss_coef=0.005
)  # model based saliency requires very small smoothness loss and therefore can produce very sharp masks
optim_phase1 = torch_optim.Adam(saliency.selector_module.parameters(),
                                0.001,