def integrated_gradient(model, x, pred_label, step_size=0.02, n_iters=4):
    avg_grad = None
    for n in range(1, n_iters + 1):
        x_ = float(n) / n_iters * x
        x_ = x_.detach()
        gradient, _, _, _ = vanilla_gradient(model, x_, pred_label, step_size)
        if n == 1:
            avg_grad = gradient
        else:
            avg_grad += gradient
    avg_grad /= n_iters
    inte_grad = np.multiply(avg_grad, x.detach().cpu().data.numpy())
    scale = np.sum(inte_grad, axis=-1, keepdims=True)
    intp = np.multiply(avg_grad, scale)
    grad_l2 = np.sum(intp[:, 0, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size

    model.hidden = model.init_hidden()
    pred, _ = model(x.cpu())
    p_prior = logit2prob(pred[0].data.numpy())
    intp /= np.sqrt(np.sum(intp[:, 0, :]**2))  # normalize to unit length
    x_after = np.copy(x.cpu().data.numpy())
    x_after = perturb_embedding(x_after, intp * step_size)
    x_after = torch.from_numpy(x_after)
    model.hidden = model.init_hidden()
    pred, _ = model(x_after.cpu())
    p_after = logit2prob(pred[0].data.numpy())
    changes_pred = p_after - p_prior

    return inte_grad, importance_score, x_after, changes_pred, avg_grad
def smooth_gradient(model,
                    x0,
                    pred_label,
                    DEVICE,
                    step_size,
                    noise_range=0.02,
                    n_iters=20):
    smooth_grad = None
    for n in range(n_iters):
        x0_ = x0 + torch.randn(x0.shape).to(DEVICE) * noise_range
        gradient, _, _, _ = vanilla_gradient(model, x0_, pred_label)
        if n == 0:
            smooth_grad = gradient
        else:
            smooth_grad += gradient
    smooth_grad /= n_iters

    grad_l2 = np.sum(smooth_grad[:, 0, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size

    model.hidden = model.init_hidden()
    pred, _ = model(x0.cpu())
    p_prior = logit2prob(pred[0].data.numpy())
    smooth_grad /= np.sqrt(np.sum(
        smooth_grad[:, 0, :]**2))  # normalize to unit length
    x_after = np.copy(x0.cpu().data.numpy())
    x_after = perturb_embedding(x_after, smooth_grad * step_size)
    x_after = torch.from_numpy(x_after)
    model.hidden = model.init_hidden()
    pred, _ = model(x_after)
    p_after = logit2prob(pred[0].data.numpy())
    changes_pred = p_after - p_prior

    return smooth_grad, importance_score, x_after, changes_pred
def vanilla_gradient(model, x, pred_label, step_size=0.02):
    model.batch_size = 1
    model.hidden = model.init_hidden()
    x = x.cpu()
    x.requires_grad = True
    pred, _ = model(x)
    x_prior = x.data.numpy()
    p_prior = logit2prob(pred[0].data.numpy())

    one_hot = np.zeros((1, 2), dtype=np.float32)
    one_hot[0][pred_label[0]] = 1
    one_hot = torch.from_numpy(one_hot)
    one_hot.requires_grad = True
    one_hot = torch.sum(one_hot * pred[0])

    gradient = grad(one_hot, x)[0].numpy()
    grad_l2 = np.sum(gradient[:, 0, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size
    gradient /= np.sqrt(np.sum(gradient[:,
                                        0, :]**2))  # normalize to unit length
    x_after = np.copy(x_prior)
    x_after = perturb_embedding(x_after, gradient * step_size)

    x_after = torch.from_numpy(x_after)
    model.hidden = model.init_hidden()
    pred, _ = model(x_after)
    p_after = logit2prob(pred[0].data.numpy())
    changes_pred = p_after - p_prior
    #print(pred_label)
    #print(importance_score)
    #print(changes_pred)

    return gradient, importance_score, x_after, changes_pred
Example #4
0
def gradient_times_input(model, row, pred_label, DEVICE, step_size=0.02):
    gradient, importance_score, x_after, changes_pred = vanilla_gradient(
        model, row, pred_label, DEVICE, step_size=step_size)
    x0, segments_ids, input_masks = row
    grad_times_input = np.multiply(gradient, x0.detach().cpu().data.numpy())
    scale = np.sum(grad_times_input, axis=-1, keepdims=True)
    intp = np.multiply(gradient, scale)
    grad_l2 = np.sum(intp[0, :, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size

    pred = model(inputs_embeds=x0,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_prior = logit2prob(pred[0].cpu().data.numpy())
    intp /= np.sqrt(np.sum(intp[0, :, :]**2))  # normalize to unit length
    x_after = np.copy(x0.cpu().data.numpy())
    x_after = perturb_embedding(x_after, intp * step_size)
    x_after = torch.from_numpy(x_after).to(DEVICE)
    pred = model(inputs_embeds=x_after,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_after = logit2prob(pred[0].cpu().data.numpy())
    changes_pred = p_after - p_prior

    return grad_times_input, importance_score, x_after, changes_pred
Example #5
0
def vanilla_gradient(model, row, pred_label, DEVICE, step_size=0.02):
    x, segments_ids, input_masks = row
    x.requires_grad = True
    pred = model(inputs_embeds=x,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    x_prior = x.cpu().data.numpy()
    p_prior = logit2prob(pred[0].cpu().data.numpy())

    one_hot = np.zeros((1, 2), dtype=np.float32)
    one_hot[0][pred_label[0]] = 1
    one_hot = torch.from_numpy(one_hot).to(DEVICE)
    one_hot.requires_grad = True
    one_hot = torch.sum(one_hot * pred[0])

    gradient = grad(one_hot, x)[0].cpu().numpy()
    grad_l2 = np.sum(gradient[0, :, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size
    gradient_unit = gradient / np.sqrt(np.sum(gradient[0, :, :]**
                                              2))  # normalize to unit length
    x_after = np.copy(x_prior)
    x_after = perturb_embedding(x_after, gradient_unit * step_size)

    x_after = torch.from_numpy(x_after).to(DEVICE)
    pred = model(inputs_embeds=x_after,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_after = logit2prob(pred[0].cpu().data.numpy())
    changes_pred = p_after - p_prior
    # print(pred_label)
    # print(changes_pred)

    return gradient, importance_score, x_after, changes_pred
Example #6
0
def integrated_gradient(model,
                        row,
                        pred_label,
                        DEVICE,
                        step_size=0.02,
                        n_iters=7):
    x, segments_ids, input_masks = row
    avg_grad = None
    for n in range(1, n_iters + 1):
        x_ = float(n) / n_iters * x
        x_ = x_.detach()
        gradient, _, _, _ = vanilla_gradient(model,
                                             [x_, segments_ids, input_masks],
                                             pred_label, DEVICE)
        if n == 1:
            avg_grad = gradient
        else:
            avg_grad += gradient
    avg_grad /= n_iters
    inte_grad = np.multiply(avg_grad, x.detach().cpu().data.numpy())
    scale = np.sum(inte_grad, axis=-1, keepdims=True)
    intp = np.multiply(avg_grad, scale)
    grad_l2 = np.sum(intp[0, :, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size

    pred = model(inputs_embeds=x,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_prior = logit2prob(pred[0].cpu().data.numpy())
    intp /= np.sqrt(np.sum(intp[0, :, :]**2))  # normalize to unit length
    x_after = np.copy(x.cpu().data.numpy())
    x_after = perturb_embedding(x_after, intp * step_size)
    x_after = torch.from_numpy(x_after).to(DEVICE)
    pred = model(inputs_embeds=x_after,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_after = logit2prob(pred[0].cpu().data.numpy())
    changes_pred = p_after - p_prior

    return inte_grad, importance_score, x_after, changes_pred
def gradient_times_input(model, x, pred_label, step_size=0.02):
    gradient, importance_score, x_after, changes_pred = vanilla_gradient(
        model, x.detach(), pred_label, step_size=step_size)
    grad_times_input = np.multiply(gradient, x.detach().cpu().data.numpy())
    scale = np.sum(grad_times_input, axis=-1, keepdims=True)
    intp = np.multiply(gradient, scale)
    grad_l2 = np.sum(intp[:, 0, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size

    model.hidden = model.init_hidden()
    pred, _ = model(x.cpu())
    p_prior = logit2prob(pred[0].data.numpy())
    intp /= np.sqrt(np.sum(intp[:, 0, :]**2))  # normalize to unit length
    x_after = np.copy(x.cpu().data.numpy())
    x_after = perturb_embedding(x_after, intp * step_size)
    x_after = torch.from_numpy(x_after)
    model.hidden = model.init_hidden()
    pred, _ = model(x_after.cpu())
    p_after = logit2prob(pred[0].data.numpy())
    changes_pred = p_after - p_prior

    return intp, importance_score, x_after, changes_pred
Example #8
0
def smooth_gradient(model, row, pred_label, DEVICE, step_size, n_iters=20):
    x0, segments_ids, input_masks = row
    noise_range = 0.4 * step_size
    smooth_grad = None
    for n in range(n_iters):
        noise = torch.randn(x0.shape)
        noise = noise / torch.sqrt(torch.sum(
            noise[0, :, :]**2)) * noise_range  # normalize noise to unit length
        x0_ = x0 + noise.to(DEVICE)
        gradient, _, _, _ = vanilla_gradient(model,
                                             [x0_, segments_ids, input_masks],
                                             pred_label, DEVICE)
        if n == 0:
            smooth_grad = gradient
        else:
            smooth_grad += gradient
    smooth_grad /= n_iters

    grad_l2 = np.sum(smooth_grad[0, :, :]**2, axis=1)
    importance_score = normalize_score(grad_l2) * step_size
    pred = model(inputs_embeds=x0,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_prior = logit2prob(pred[0].cpu().data.numpy())
    smooth_grad /= np.sqrt(np.sum(
        smooth_grad[0, :, :]**2))  # normalize to unit length
    x_after = np.copy(x0.cpu().data.numpy())
    x_after = perturb_embedding(x_after, smooth_grad * step_size)
    x_after = torch.from_numpy(x_after).to(DEVICE)
    pred = model(inputs_embeds=x_after,
                 token_type_ids=segments_ids,
                 attention_mask=input_masks,
                 labels=None)[0]
    p_after = logit2prob(pred[0].cpu().data.numpy())
    changes_pred = p_after - p_prior

    return smooth_grad, importance_score, x_after, changes_pred