Example #1
0
def main():
    # hyper-parameters
    val_dir = '/path/to/imagenet/val/'
    batch_size = 1
    num_workers = 4
    batch = 0

    model_type = "vgg"
    saliency_type = 'group_cam'

    # sample_range = range(5 * batch, 5 * (batch + 1))
    sample_range = range(1 * batch, 1 * (batch + 1))

    vgg = models.vgg19(pretrained=True).eval()
    vgg = vgg.cuda()
    cam = GroupCAM(vgg, target_layer='features.35', groups=32)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    val_loader = DataLoader(
        datasets.ImageFolder(val_dir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True,
        sampler=RangeSampler(sample_range)
    )

    images, exp = explain_all(val_loader, explainer=cam)
    # Function that blurs input image
    blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.))

    # Evaluate a batch of explanations
    insertion = CausalMetric(vgg, 'ins', 224 * 2, substrate_fn=blur)
    deletion = CausalMetric(vgg, 'del', 224 * 2, substrate_fn=torch.zeros_like)

    scores = {'del': [], 'ins': []}
    del_tmps = []
    ins_tmps = []
    # Load saved batch of explanations
    for i in tqdm(range(len(images)), total=len(images), desc='Evaluating Saliency'):
        # Evaluate deletion
        del_score = deletion.evaluate(img=images[i], mask=exp[i], cls_idx=None, verbose=0)
        ins_score = insertion.evaluate(img=images[i], mask=exp[i], cls_idx=None, verbose=0)
        del_tmps.append(del_score)
        ins_tmps.append(ins_score)
        scores['del'].append(auc(del_score))
        scores['ins'].append(auc(ins_score))

    print('----------------------------------------------------------------')
    print('Final:\nDeletion - {:.5f}\nInsertion - {:.5f}'.format(np.mean(scores['del']), np.mean(scores['ins'])))
Example #2
0
    def forward(self,
                x: torch.Tensor) -> Tuple[List, List, List]:  # type: ignore
        bs, ch, h, w = x.size()
        cur_level, cur_sigma, pixel_distance = self.get_first_level(x)

        sigmas = [
            cur_sigma * torch.ones(bs, self.n_levels + self.extra_levels).to(
                x.device).to(x.dtype)
        ]
        pixel_dists = [
            pixel_distance * torch.ones(
                bs, self.n_levels + self.extra_levels).to(x.device).to(x.dtype)
        ]
        pyr = [[cur_level]]
        oct_idx = 0
        while True:
            cur_level = pyr[-1][0]
            for level_idx in range(1, self.n_levels + self.extra_levels):
                sigma = cur_sigma * math.sqrt(self.sigma_step**2 - 1.0)
                ksize = self.get_kernel_size(sigma)

                # Hack, because PyTorch does not allow to pad more than original size.
                # But for the huge sigmas, one needs huge kernel and padding...

                ksize = min(ksize, min(cur_level.size(2), cur_level.size(3)))
                if ksize % 2 == 0:
                    ksize += 1

                cur_level = gaussian_blur2d(cur_level, (ksize, ksize),
                                            (sigma, sigma))
                cur_sigma *= self.sigma_step
                pyr[-1].append(cur_level)
                sigmas[-1][:, level_idx] = cur_sigma
                pixel_dists[-1][:, level_idx] = pixel_distance
            _pyr = pyr[-1][-self.extra_levels]
            nextOctaveFirstLevel = F.interpolate(
                _pyr,
                size=(_pyr.size(-2) // 2, _pyr.size(-1) // 2),
                mode='nearest')  # Nearest matches OpenCV SIFT
            pixel_distance *= 2.0
            cur_sigma = self.init_sigma
            if min(nextOctaveFirstLevel.size(2),
                   nextOctaveFirstLevel.size(3)) <= self.min_size:
                break
            pyr.append([nextOctaveFirstLevel])
            sigmas.append(
                cur_sigma *
                torch.ones(bs, self.n_levels + self.extra_levels).to(x.device))
            pixel_dists.append(
                pixel_distance *
                torch.ones(bs, self.n_levels + self.extra_levels).to(x.device))
            oct_idx += 1
        for i in range(len(pyr)):
            pyr[i] = torch.stack(pyr[i], dim=2)  # type: ignore
        return pyr, sigmas, pixel_dists
Example #3
0
def main():
    args = parse_args()
    raw_img = cv2.imread(args.input, 1)
    raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR)

    raw_img = np.float32(raw_img) / 255
    image, norm_image = preprocess_img(raw_img)
    model = models.__dict__[args.arch](pretrained=True).eval()
    model = model.cuda()

    gc = GradCAM(model, target_layer=args.target_layer)

    heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    cam = show_cam(image, heatmap, args.output)

    if args.ins_del:
        blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.))
        insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur)
        deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like)
        out_video_path = './VIDEO'
        check_path_exist(out_video_path)

        ins_path = os.path.join(os.path.join(out_video_path, "ins"))
        del_path = os.path.join(os.path.join(out_video_path, "del"))
        check_path_exist(ins_path)
        check_path_exist(del_path)

        norm_image = norm_image.cpu()
        heatmap = heatmap.cpu().numpy()

        ins_score = insertion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=ins_path)
        del_score = deletion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=del_path)
        print("\nDeletion - {:.5f}\nInsertion - {:.5f}".format(auc(del_score), auc(ins_score)))

        # generate video
        video_ins = os.path.join(ins_path, args.input.split('/')[-1].split('.')[0] + '.avi')
        video_del = os.path.join(del_path, args.input.split('/')[-1].split('.')[0] + '.avi')
        cmd_str_ins = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(ins_path, video_ins)
        cmd_str_del = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(del_path, video_del)
        os.system(cmd_str_ins)
        os.system(cmd_str_del)
Example #4
0
 def get_first_level(self, input):
     pixel_distance = 1.0
     cur_sigma = 0.5
     # Same as in OpenCV up to interpolation difference
     if self.double_image:
         x = F.interpolate(input,
                           scale_factor=2.0,
                           mode='bilinear',
                           align_corners=False)
         pixel_distance = 0.5
         cur_sigma *= 2.0
     else:
         x = input
     if self.init_sigma > cur_sigma:
         sigma = max(math.sqrt(self.init_sigma**2 - cur_sigma**2), 0.01)
         ksize = self.get_kernel_size(sigma)
         cur_level = gaussian_blur2d(x, (ksize, ksize), (sigma, sigma))
         cur_sigma = self.init_sigma
     else:
         cur_level = x
     return cur_level, cur_sigma, pixel_distance
Example #5
0
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from kornia.filters.gaussian import gaussian_blur2d
from cam import GroupCAM

# import torchvision.models as models
import backbones as models

# Function that blurs input image
blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.))

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(
    description=
    'An example of adopting group-cam to fine-tune classification models')
parser.add_argument('data', metavar='DIR', help='path to dataset')
parser.add_argument('-a',
                    '--arch',
                    metavar='ARCH',
                    default='resnet18',
                    choices=model_names,
                    help='model architecture: ' + ' | '.join(model_names) +
    def forward(self, x, class_idx=None, retain_graph=False):
        input = x.clone()
        input = input.cuda()
        b, c, h, w = input.size()
        logit = self.model(input)

        if class_idx is None:
            predicted_class = logit.max(1)[-1]
            score = logit[:, logit.max(1)[-1]].squeeze()
        else:
            predicted_class = torch.LongTensor([class_idx])
            score = logit[:, class_idx].squeeze()

        predicted_class = predicted_class.cuda()
        self.model.zero_grad()
        score.backward(retain_graph=retain_graph)
        gradients = self.gradients['value'].data
        activations = self.activations['value'].data
        b, k, u, v = activations.size()

        alpha = gradients.view(b, k, -1).mean(2)
        weights = alpha.view(b, k, 1, 1)
        activations = weights * activations

        masks = activations.chunk(self.groups, 1)
        # parallel implement
        masks = torch.cat(masks, dim=0)
        saliency_map = masks.sum(1, keepdim=True)
        saliency_map = F.relu(saliency_map)
        threshold = np.percentile(saliency_map.cpu().numpy(), 70)
        saliency_map = torch.where(saliency_map > threshold, saliency_map,
                                   torch.full_like(saliency_map, 0))

        saliency_map = F.interpolate(saliency_map,
                                     size=(h, w),
                                     mode='bilinear',
                                     align_corners=False)
        saliency_map = saliency_map.reshape(self.groups, -1)
        inter_min, inter_max = saliency_map.min(
            dim=-1, keepdim=True)[0], saliency_map.max(dim=-1, keepdim=True)[0]
        saliency_map = (saliency_map - inter_min) / (inter_max - inter_min)
        saliency_map = saliency_map.reshape(self.groups, 1, h, w)

        with torch.no_grad():
            blur_input = input * saliency_map + gaussian_blur2d(input) * (
                1 - saliency_map)
            output = self.model(blur_input)
        output = F.softmax(output, dim=-1)
        score = output[:, predicted_class].unsqueeze(-1).unsqueeze(-1)
        score_saliency_map = torch.sum(saliency_map * score,
                                       dim=0,
                                       keepdim=True)

        score_saliency_map = F.relu(score_saliency_map)
        score_saliency_map_min, score_saliency_map_max = score_saliency_map.min(
        ), score_saliency_map.max()

        if score_saliency_map_min == score_saliency_map_max:
            return None

        score_saliency_map = (score_saliency_map - score_saliency_map_min) / (
            score_saliency_map_max - score_saliency_map_min).data
        return score_saliency_map
Example #7
0
def canny(
    input: torch.Tensor,
    low_threshold: float = 0.1,
    high_threshold: float = 0.2,
    kernel_size: Tuple[int, int] = (5, 5),
    sigma: Tuple[float, float] = (1, 1),
    hysteresis: bool = True,
    eps: float = 1e-6,
) -> Tuple[torch.Tensor, torch.Tensor]:
    r"""Finds edges of the input image and filters them using the Canny algorithm.

    .. image:: _static/img/canny.png

    Args:
        input: input image tensor with shape :math:`(B,C,H,W)`.
        low_threshold: lower threshold for the hysteresis procedure.
        high_threshold: upper threshold for the hysteresis procedure.
        kernel_size: the size of the kernel for the gaussian blur.
        sigma: the standard deviation of the kernel for the gaussian blur.
        hysteresis: if True, applies the hysteresis edge tracking.
            Otherwise, the edges are divided between weak (0.5) and strong (1) edges.
        eps: regularization number to avoid NaN during backprop.

    Returns:
        - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`.
        - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`.

    Example:
        >>> input = torch.rand(5, 3, 4, 4)
        >>> magnitude, edges = canny(input)  # 5x3x4x4
        >>> magnitude.shape
        torch.Size([5, 1, 4, 4])
        >>> edges.shape
        torch.Size([5, 1, 4, 4])
    """
    if not isinstance(input, torch.Tensor):
        raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(input)))

    if not len(input.shape) == 4:
        raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}".format(input.shape))

    if low_threshold > high_threshold:
        raise ValueError(
            "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: {}>{}".format(
                low_threshold, high_threshold
            )
        )

    if low_threshold < 0 and low_threshold > 1:
        raise ValueError(
            "Invalid input threshold. low_threshold should be in range (0,1). Got: {}".format(low_threshold)
        )

    if high_threshold < 0 and high_threshold > 1:
        raise ValueError(
            "Invalid input threshold. high_threshold should be in range (0,1). Got: {}".format(high_threshold)
        )

    device: torch.device = input.device
    dtype: torch.dtype = input.dtype

    # To Grayscale
    if input.shape[1] == 3:
        input = rgb_to_grayscale(input)

    # Gaussian filter
    blurred: torch.Tensor = gaussian_blur2d(input, kernel_size, sigma)

    # Compute the gradients
    gradients: torch.Tensor = spatial_gradient(blurred, normalized=False)

    # Unpack the edges
    gx: torch.Tensor = gradients[:, :, 0]
    gy: torch.Tensor = gradients[:, :, 1]

    # Compute gradient magnitude and angle
    magnitude: torch.Tensor = torch.sqrt(gx * gx + gy * gy + eps)
    angle: torch.Tensor = torch.atan2(gy, gx)

    # Radians to Degrees
    angle = rad2deg(angle)

    # Round angle to the nearest 45 degree
    angle = torch.round(angle / 45) * 45

    # Non-maximal suppression
    nms_kernels: torch.Tensor = get_canny_nms_kernel(device, dtype)
    nms_magnitude: torch.Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2)

    # Get the indices for both directions
    positive_idx: torch.Tensor = (angle / 45) % 8
    positive_idx = positive_idx.long()

    negative_idx: torch.Tensor = ((angle / 45) + 4) % 8
    negative_idx = negative_idx.long()

    # Apply the non-maximum suppresion to the different directions
    channel_select_filtered_positive: torch.Tensor = torch.gather(nms_magnitude, 1, positive_idx)
    channel_select_filtered_negative: torch.Tensor = torch.gather(nms_magnitude, 1, negative_idx)

    channel_select_filtered: torch.Tensor = torch.stack(
        [channel_select_filtered_positive, channel_select_filtered_negative], 1
    )

    is_max: torch.Tensor = channel_select_filtered.min(dim=1)[0] > 0.0

    magnitude = magnitude * is_max

    # Threshold
    edges: torch.Tensor = F.threshold(magnitude, low_threshold, 0.0)

    low: torch.Tensor = magnitude > low_threshold
    high: torch.Tensor = magnitude > high_threshold

    edges = low * 0.5 + high * 0.5
    edges = edges.to(dtype)

    # Hysteresis
    if hysteresis:
        edges_old: torch.Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype)
        hysteresis_kernels: torch.Tensor = get_hysteresis_kernel(device, dtype)

        while ((edges_old - edges).abs() != 0).any():
            weak: torch.Tensor = (edges == 0.5).float()
            strong: torch.Tensor = (edges == 1).float()

            hysteresis_magnitude: torch.Tensor = F.conv2d(
                edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2
            )
            hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype)
            hysteresis_magnitude = hysteresis_magnitude * weak + strong

            edges_old = edges.clone()
            edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5

        edges = hysteresis_magnitude

    return magnitude, edges
Example #8
0
def main():
    args = parse_args()
    raw_img = cv2.imread(args.input, 1)
    raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR)

    raw_img = np.float32(raw_img) / 255
    image, norm_image = preprocess_img(raw_img)
    model = models.__dict__[args.arch](pretrained=True).eval()
    model = model.cuda()

    rise = RISE(model, input_size=(224, 224), batch_size=40)
    rise.generate_masks()
    gd = GradCAM(model, target_layer=args.target_layer)
    gc = GroupCAM(model, target_layer=args.target_layer)

    rise_heatmap = rise(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    gd_heatmap = gd(norm_image.cuda(), class_idx=args.cls_idx).cpu().data
    gc_heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data

    if args.output is not None:
        rise_cam = show_cam(image, rise_heatmap, "rise_base.png")
        gd_cam = show_cam(image, gd_heatmap, "gd_base.png")
        gc_cam = show_cam(image, gc_heatmap, "gc_base.png")

    if args.ins_del:
        blur = lambda x: gaussian_blur2d(
            x, kernel_size=(51, 51), sigma=(50., 50.))
        insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur)
        deletion = CausalMetric(model,
                                'del',
                                224 * 2,
                                substrate_fn=torch.zeros_like)

        norm_image = norm_image.cpu()
        gd_heatmap = gd_heatmap.cpu().numpy()
        gc_heatmap = gc_heatmap.cpu().numpy()
        rise_heatmap = rise_heatmap.cpu().numpy()

        gc_ins_score = insertion.evaluate(norm_image,
                                          mask=gc_heatmap,
                                          cls_idx=None)
        gd_ins_score = insertion.evaluate(norm_image,
                                          mask=gd_heatmap,
                                          cls_idx=None)
        rise_ins_score = insertion.evaluate(norm_image,
                                            mask=rise_heatmap,
                                            cls_idx=None)

        gc_del_score = deletion.evaluate(norm_image,
                                         mask=gc_heatmap,
                                         cls_idx=None)
        gd_del_score = deletion.evaluate(norm_image,
                                         mask=gd_heatmap,
                                         cls_idx=None)
        rise_del_score = deletion.evaluate(norm_image,
                                           mask=rise_heatmap,
                                           cls_idx=None)

        legend = ["RISE", "Grad-CAM", "Group-CAM"]
        ins_scores = [
            auc(rise_ins_score),
            auc(gd_ins_score),
            auc(gc_ins_score)
        ]
        del_scores = [
            auc(rise_del_score),
            auc(gd_del_score),
            auc(gc_del_score)
        ]
        ins_scores = [round(i * 100, 2) for i in ins_scores]
        del_scores = [round(i * 100, 2) for i in del_scores]
        ins_legend = [i + ": " + str(j) for i, j in zip(legend, ins_scores)]
        del_legend = [i + ": " + str(j) for i, j in zip(legend, del_scores)]

        n_steps = len(gd_ins_score)

        x = np.arange(n_steps) / n_steps
        plt.figure(figsize=(12, 5))

        plt.xlim(-0.1, 1.1)
        plt.ylim(0, 1.05)

        plt.subplot(121)
        plt.plot(x, rise_ins_score)
        plt.plot(x, gd_ins_score)
        plt.plot(x, gc_ins_score)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.legend(ins_legend, loc='best', fontsize=15)
        plt.title("Insertion Curve", fontsize=15)

        plt.subplot(122)
        plt.plot(x, rise_del_score)
        plt.plot(x, gd_del_score)
        plt.plot(x, gc_del_score)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.legend(del_legend, loc='best', fontsize=15)
        plt.title("Deletion Curve", fontsize=15)
        plt.show()