Beispiel #1
0
def test_get_img_shape_on_2d_image():
    n = 5
    channels = 4
    dim1 = 1
    dim2 = 2

    K.set_image_data_format('channels_first')
    assert (n, channels, dim1, dim2) == utils.get_img_shape(K.ones(shape=(n, channels, dim1, dim2)))

    K.set_image_data_format('channels_last')
    assert (n, channels, dim1, dim2) == utils.get_img_shape(K.ones(shape=(n, dim1, dim2, channels)))
Beispiel #2
0
def test_get_img_shape_on_2d_image():
    n = 5
    channels = 4
    dim1 = 1
    dim2 = 2

    K.set_image_data_format('channels_first')
    assert (n, channels, dim1, dim2) == utils.get_img_shape(
        K.ones(shape=(n, channels, dim1, dim2)))

    K.set_image_data_format('channels_last')
    assert (n, channels, dim1, dim2) == utils.get_img_shape(
        K.ones(shape=(n, dim1, dim2, channels)))
def visualize_cam(model,
                  layer_idx,
                  filter_indices,
                  seed_img,
                  penultimate_layer_idx=None,
                  alpha=0):
    """Generates a gradient based class activation map (CAM) as described in paper
    [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization](https://arxiv.org/pdf/1610.02391v1.pdf).
    Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to
    network architecture in some instances, grad-CAM has a more general applicability.

    Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights
    cat regions and not the 'dog' region and vice-versa.

    Args:
        model: The `keras.models.Model` instance. Model input is expected to be a 4D image input of shape:
            `(samples, channels, rows, cols)` if data_format='channels_first' or `(samples, rows, cols, channels)` if data_format='channels_last'.
        layer_idx: The layer index within `model.layers` whose filters needs to be visualized.
        filter_indices: filter indices within the layer to be maximized.
            For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index.

            If you are visualizing final `keras.layers.Dense` layer, you tend to get
            better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax'
            output can be maximized by minimizing scores for other classes.

        seed_img: The input image for which activation map needs to be visualized.
        penultimate_layer_idx: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients
            wrt filter output. If not provided, it is set to the nearest penultimate `Convolutional` or `Pooling` layer.
        alpha: The alpha value of image as overlayed onto the heatmap. 
            This value needs to be between [0, 1], with 0 being heatmap only to 1 being image only (Default value = 0.5)

     Example:
        If you wanted to visualize attention over 'bird' category, say output index 22 on the
        final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`.

        One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should
        (hopefully) show attention map that corresponds to both 22, 23 output categories.

    Notes:
        This technique deprecates occlusion maps as it gives similar results, but with one-pass gradient computation
        as opposed inefficient sliding window approach.

    Returns:
        The heatmap image, overlayed with `seed_img` using `alpha`, indicating image regions that, when changed, 
        would contribute the most towards maximizing the output of `filter_indices`.
    """
    if alpha < 0. or alpha > 1.:
        raise ValueError("`alpha` needs to be between [0, 1]")

    filter_indices = utils.listify(filter_indices)
    print("Working on filters: {}".format(pprint.pformat(filter_indices)))

    # Search for the nearest penultimate `Convolutional` or `Pooling` layer.
    if penultimate_layer_idx is None:
        for idx, layer in utils.reverse_enumerate(model.layers[:layer_idx -
                                                               1]):
            if isinstance(layer, (Convolution2D, _Pooling2D)):
                penultimate_layer_idx = idx
                break

    if penultimate_layer_idx is None:
        raise ValueError(
            'Unable to determine penultimate `Convolution2D` or `Pooling2D` '
            'layer for layer_idx: {}'.format(layer_idx))
    assert penultimate_layer_idx < layer_idx

    losses = [(ActivationMaximization(model.layers[layer_idx],
                                      filter_indices), 1)]

    penultimate_output = model.layers[penultimate_layer_idx].output
    opt = Optimizer(model.input, losses, wrt=penultimate_output)
    _, grads, penultimate_output_value = opt.minimize(seed_img,
                                                      max_iter=1,
                                                      verbose=False)

    # We are minimizing loss as opposed to maximizing output as with the paper.
    # So, negative gradients here mean that they reduce loss, maximizing class probability.
    grads *= -1

    # Average pooling across all feature maps.
    # This captures the importance of feature map (channel) idx to the output
    s_idx, c_idx, row_idx, col_idx = utils.get_img_indices()
    weights = np.mean(grads, axis=(s_idx, row_idx, col_idx))

    # Generate heatmap by computing weight * output over feature maps
    s, ch, rows, cols = utils.get_img_shape(penultimate_output)
    heatmap = np.ones(shape=(rows, cols), dtype=np.float32)
    for i, w in enumerate(weights):
        heatmap += w * penultimate_output_value[utils.slicer[0, i, :, :]]

    # The penultimate feature map size is definitely smaller than input image.
    s, ch, rows, cols = utils.get_img_shape(model.input)

    # TODO: Figure out a way to get rid of open cv dependency.
    # skimage doesn't deal with arbitrary floating point ranges.
    heatmap = cv2.resize(heatmap, (cols, rows), interpolation=cv2.INTER_CUBIC)

    # ReLU thresholding, normalize between (0, 1)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    heatmap *= 10
    heatmap_colored = heatmap
    # Convert to heatmap and zero out low probabilities for a cleaner output.
    #heatmap_colored = np.uint8(cm.jet(heatmap)[..., :3] * 10)
    #heatmap_colored[np.where(heatmap < 0.2)] = 0

    #heatmap_colored = np.uint8(seed_img * alpha + heatmap_colored * (1. - alpha))
    return heatmap_colored
def visualize_cam_with_losses(input_tensor,
                              losses,
                              seed_input,
                              penultimate_layer,
                              grad_modifier=None):
    """Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor`
    with respect to weighted `losses`.

    For details on grad-CAM, see the paper:
    [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization]
    (https://arxiv.org/pdf/1610.02391v1.pdf).

    Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to
    network architecture in some instances, grad-CAM has a more general applicability.

    Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights
    cat regions and not the 'dog' region and vice-versa.

    Args:
        input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format=
            channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`.
        losses: List of ([Loss](vis.losses#Loss), weight) tuples.
        seed_input: The model input for which activation map needs to be visualized.
        penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients
            with respect to filter output.
        grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't
            specify anything, gradients are unchanged (Default value = None)

    Returns:
        The normalized gradients of `seed_input` with respect to weighted `losses`.
    """
    penultimate_output = penultimate_layer.output
    opt = Optimizer(input_tensor,
                    losses,
                    wrt_tensor=penultimate_output,
                    norm_grads=False)
    _, grads, penultimate_output_value = opt.minimize(
        seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False)

    # For numerical stability. Very small grad values along with small penultimate_output_value can cause
    # w * penultimate_output_value to zero out, even for reasonable fp precision of float32.
    grads = grads / (np.max(grads) + K.epsilon())

    # Average pooling across all feature maps.
    # This captures the importance of feature map (channel) idx to the output.
    channel_idx = 1 if K.image_data_format() == 'channels_first' else -1
    other_axis = np.delete(np.arange(len(grads.shape)), channel_idx)
    weights = np.mean(grads, axis=tuple(other_axis))

    # Generate heatmap by computing weight * output over feature maps
    output_dims = utils.get_img_shape(penultimate_output)[2:]
    heatmap = np.zeros(shape=output_dims, dtype=K.floatx())
    for i, w in enumerate(weights):
        if channel_idx == -1:
            heatmap += w * penultimate_output_value[0, ..., i]
        else:
            heatmap += w * penultimate_output_value[0, i, ...]

    # ReLU thresholding to exclude pattern mismatch information (negative gradients).
    heatmap = np.maximum(heatmap, 0)

    # The penultimate feature map size is definitely smaller than input image.
    input_dims = utils.get_img_shape(input_tensor)[2:]

    # Figure out the zoom factor.
    zoom_factor = [
        i / (j * 1.0) for i, j in iter(zip(input_dims, output_dims))
    ]
    heatmap = zoom(heatmap, zoom_factor)
    return utils.normalize(heatmap)