def get_shap_scores_layer(model, X, layer_name, output_index=-1, method_name='deepexplainer'):
    # local_smoothing ?
    # ranked_outputs
    def map2layer(model, x, layer_name):
        fetch = model.get_layer(layer_name).output
        feed_dict = dict(zip([model.layers[0].input], [x.copy()]))
        return K.get_session().run(fetch, feed_dict)

    import shap
    if type(output_index) == str:
        y = model.get_layer(output_index).output
    else:
        y = model.outputs[output_index]

    x = model.get_layer(layer_name).output
    if method_name == 'deepexplainer':
        explainer = shap.DeepExplainer((x, y), map2layer(model, X.copy(), layer_name))
        shap_values, indexes = explainer.shap_values(map2layer(model, X, layer_name), ranked_outputs=2)
    elif method_name == 'gradientexplainer':
        explainer = shap.GradientExplainer((x, y), map2layer(model, X.copy(), layer_name), local_smoothing=2)
        shap_values, indexes = explainer.shap_values(map2layer(model, X, layer_name), ranked_outputs=2)
    else:
        raise ('unsppuorted method')

    print (shap_values[0].shape)
    return shap_values[0]
Beispiel #2
0
def test_pytorch_multiple_inputs():
    # pylint: disable=no-member
    torch = pytest.importorskip('torch')
    from torch import nn
    torch.manual_seed(1)
    batch_size = 10
    x1 = torch.ones(batch_size, 3)
    x2 = torch.ones(batch_size, 4)

    background = [torch.zeros(batch_size, 3), torch.zeros(batch_size, 4)]

    class Net(nn.Module):
        def __init__(self):
            super().__init__()
            self.linear = nn.Linear(7, 1)

        def forward(self, x1, x2):
            return self.linear(torch.cat((x1, x2), dim=-1))

    model = Net()

    e = shap.GradientExplainer(model, background)
    shap_x1, shap_x2 = e.shap_values([x1, x2])

    model.eval()
    model.zero_grad()
    with torch.no_grad():
        diff = (model(x1, x2) - model(*background)).detach().numpy().mean(0)

    sums = np.array(
        [shap_x1[i].sum() + shap_x2[i].sum() for i in range(len(shap_x1))])
    d = np.abs(sums - diff).sum()
    assert d / np.abs(diff).sum(
    ) < 0.05, "Sum of SHAP values does not match difference! %f" % (
        d / np.abs(diff).sum())
Beispiel #3
0
    def __init__(self, *argv, **kwargs):
        """
        Initialize shap kernelexplainer object.
        """
        super(GradientExplainer, self).__init__(*argv, **kwargs)

        self.explainer = shap.GradientExplainer(*argv, **kwargs)
Beispiel #4
0
def shap_explain(shap_dict):

    model = shap_dict['MODEL']

    def map2layer(x, layer, model):
        feed_dict = dict(zip([model.layers[0].input.experimental_ref()], [x.copy()]))

        graph = tf.compat.v1.get_default_graph()
        print(graph.get_operations())
        with tf.compat.v1.Session() as sess:
            ret = sess.run(model.layers[layer], feed_dict)
        return ret

    preprocess_input = []
    for i in range(100):
        img, label = shap_dict['TEST_GENERATOR'].next()
        img = np.squeeze(img, axis=0)
        preprocess_input.append(img)
    inference = []
    for i in range(100):
        img, label = shap_dict['TEST_GENERATOR'].next()
        img = np.squeeze(img, axis=0)
        inference.append(img)
    preprocess_input = np.array(preprocess_input)
    inference = np.array(inference)
    e = shap.GradientExplainer((model.layers[0].input, model.layers[-1].output),
                               map2layer(preprocess_input.copy(), 0, model))
    shap_values, indexes = e.shap_values(map2layer(inference, 0, model), ranked_outputs=2)
    return shap_values, indexes
Beispiel #5
0
    def eval_predictor_importance(self, features, features_names):
            explainer_shap = shap.GradientExplainer(model=self,
                                                data=features)
            # Fit the explainer on a subset of the data (you can try all but then gets slower)
            shap_values = explainer_shap.shap_values(X=features,
                                                     ranked_outputs=True)

            predictors_shap_values = shap_values[0]
            predictors_feature_order = np.argsort(np.sum(np.mean(np.abs(predictors_shap_values), axis=0), axis=0))

            predictors_left_pos = np.zeros(len(predictors_feature_order))

            predictors_class_inds = np.argsort([-np.abs(predictors_shap_values[i]).mean() for i in range(len(predictors_shap_values))])
            for i, ind in enumerate(predictors_class_inds):
                predictors_global_shap_values = np.abs(predictors_shap_values[ind]).mean(0)
                predictors_left_pos += predictors_global_shap_values[predictors_feature_order]

            predictors_ds = {}
            predictors_ds['features'] = np.asarray(features_names)[predictors_feature_order]
            predictors_ds['values'] = predictors_left_pos
            predictors_features_df = pd.DataFrame.from_dict(predictors_ds)
            values = {}
            for index, row in predictors_features_df.iterrows():
                values[row['features']]=row['values']

            return values
Beispiel #6
0
def explore_shap(org, model, cols, frac=0.11):
    explainer_org = shap.GradientExplainer(model, org)
    shap_values_org = explainer_org.shap_values(org[:int(len(org) *
                                                         frac), :, :])

    shap_abs_org = np.absolute(shap_values_org[0])
    sum_0_org = np.sum(shap_abs_org, axis=0)

    x_pos = [i for i, _ in enumerate(cols)]
    plt1 = plt.subplot(311)
    plt1.barh(x_pos, sum_0_org[-1])
    plt1.set_yticks(x_pos)
    plt1.set_yticklabels(cols)
    plt1.set_title("Yesterday’s features (time-step -1)")
    plt2 = plt.subplot(312, sharex=plt1)
    plt2.barh(x_pos, sum_0_org[-2])
    plt2.set_yticks(x_pos)
    plt2.set_yticklabels(cols)
    plt2.set_title("The day before yesterday’s features(time-step -2)")
    plt.tight_layout()
    plt.show()
    shap.summary_plot(np.sum(shap_values_org[0], axis=0),
                      cols,
                      plot_type='bar',
                      color='royalblue')
Beispiel #7
0
def explain_xray_shap(shap_dict, idx, layerToExplain, save_exp=True):
    '''
    Make a prediction and provide a LIME explanation
    :param lime_dict: dict containing important information and objects for explanation experiments
    :param idx: index of image in test set to explain
    :param save_exp: Boolean indicating whether to save the explanation visualization
    '''

    # Get 50 preprocessed image in test set
    shap_dict['TEST_GENERATOR'].reset()
    X = []
    y = []
    for i in range(50):
        x, o = shap_dict['TEST_GENERATOR'].next()
        x = np.squeeze(x, axis=0)
        X.append(x)
        y.append(o)
    #x = np.squeeze(x, axis=0)
    X = np.array(X)
    y = np.array(y)

    # Get the corresponding original image (no preprocessing)
    orig_img = cv2.imread(shap_dict['RAW_DATA_PATH'] +
                          shap_dict['TEST_SET']['filename'][idx])
    new_dim = tuple(shap_dict['IMG_DIM'])
    orig_img = cv2.resize(orig_img, new_dim,
                          interpolation=cv2.INTER_NEAREST)  # Resize image

    to_explain = X[[idx]]

    model = shap_dict['MODEL']

    # explain how the input to the 7th layer of the model explains the top two classes
    def map2layer(x, layer):
        feed_dict = dict(zip([model.layers[0].input], [x.copy()]))
        return K.get_session().run(model.layers[layer].input, feed_dict)

    e = shap.GradientExplainer(
        (model.layers[layerToExplain].input, model.layers[-1].output),
        map2layer(X, layerToExplain),
        local_smoothing=0  # std dev of smoothing noise
    )

    shap_values, indexes = e.shap_values(map2layer(to_explain, layerToExplain),
                                         ranked_outputs=2)

    # get the names for the classes
    # Rearrange index vector to reflect original ordering of classes in project config
    #set_trace()
    indexes = [[
        indexes[0][shap_dict['CLASSES'].index(c)]
        for c in shap_dict['TEST_GENERATOR'].class_indices
    ]]
    index_names = np.vectorize(lambda x: shap_dict['CLASSES'][x])(indexes)

    # plot the explanations
    shap.image_plot(shap_values, to_explain, index_names, orig_img=orig_img)

    return
Beispiel #8
0
def shap_gradient_report(model, dataset, folder_name):
    model = model.cpu()
    report_file_name = f'{folder_name}/{global_vars.get("report")}.pdf'
    train_data = np_to_var(dataset['train'].X[:, :, :, None])
    story = []
    shap_imgs = []
    all_paths = []
    segment_examples = {}
    segment_labels = {}
    for segment in ['train', 'test']:
        segment_data = np_to_var(dataset[segment].X[:, :, :, None])
        selected_examples = np.random.choice(segment_data.shape[0], int(segment_data.shape[0] * global_vars.get('explainer_sampling_rate')), replace=False)
        segment_examples[segment] = segment_data[selected_examples]
        segment_labels[segment] = dataset[segment].y[selected_examples]

    shap_rankings = {'train': OrderedDict(), 'test': OrderedDict()}
    prev_layer = None
    for layer_idx, layer in list(enumerate(list(model.children())))[global_vars.get('layer_idx_cutoff'):]:
        if layer_idx > 0 and type(prev_layer) == nn.Conv2d: # we only take layers whose INPUT is a conv
            e = shap.GradientExplainer((model, list(model.children())[layer_idx]), train_data)
            for segment in ['train', 'test']:
                plt.clf()
                print(f'Getting shap values for {len(segment_examples[segment])} {segment} samples')
                shap_values, indexes = e.shap_values(segment_examples[segment], ranked_outputs=2, nsamples=200)

                shap_val = np.array(shap_values[0]).squeeze()
                shap_abs = np.absolute(shap_val)
                shap_sum = np.sum(shap_abs, axis=0) # sum on sample axis
                if shap_sum.ndim > 1:
                    shap_sum = np.sum(shap_sum, axis=1) # sum on time axis
                shap_sum_idx = np.argsort(shap_sum) # sort
                for filter_idx in shap_sum_idx:
                    shap_rankings[segment][f'layer_{layer_idx-1}_filter_{filter_idx}'] = shap_sum[filter_idx] # we use layer_idx-1 because GradientExplainer looks at an INPUT of the layer

                if global_vars.get('plot'):
                    index_names = np.vectorize(lambda x: label_by_idx(x))(indexes)
                    shap.image_plot(shap_values, -segment_examples[segment].numpy(), labels=index_names)
                    plt.suptitle(f'SHAP gradient values for dataset: {global_vars.get("dataset")}, segment: {segment}, layer {layer_idx}\n'
                                 f'segment labels:{[label_by_idx(segment_labels[segment][i]) for i in range(len(segment_labels[segment]))]}', fontsize=10)
                    shap_img_file = f'temp/{get_next_temp_image_name("temp")}.png'
                    shap_imgs.append(shap_img_file)
                    plt.savefig(shap_img_file, dpi=200)
                    story.append(get_image(shap_img_file))
                    all_paths.append(shap_img_file)
        prev_layer = layer

    write_dict(shap_rankings['train'], f'{folder_name}/shap_rankings_train.txt')
    write_dict(shap_rankings['test'], f'{folder_name}/shap_rankings_test.txt')
    if global_vars.get('plot'):
        create_pdf_from_story(report_file_name, story)
        global_vars.get('sacred_ex').add_artifact(report_file_name)
    for im in all_paths:
        os.remove(im)
Beispiel #9
0
    def __init__(self,
                 background,
                 model,
                 output_size,
                 matrix_path="",
                 img_path=""):

        self.e = shap.GradientExplainer(model,
                                        background)  #np.array(background))
        self.img_path = img_path
        self.matrix_path = matrix_path
        self.image_counter = 0
        self.output_size = output_size
Beispiel #10
0
def test_shap(org, gen, rand, frac):
    org_x_tr, org_x_vl, org_y_tr, org_y_vl, _ = prepare_df_rand(
        org, random_state=rand)
    gen_x_tr, gen_x_vl, gen_y_tr, gen_y_vl, _ = prepare_df_rand(
        gen, random_state=rand + 1)

    epochs = 10

    org_model, org_pred = model_output(org, epochs, rand + 1, frac=frac)
    explainer_org = shap.GradientExplainer(org_model, org_x_vl)
    shap_values_org = explainer_org.shap_values(org_x_vl[:int(len(org_x_vl))])

    gen_model, gen_pred = model_output(
        gen, epochs, rand,
        frac=frac)  # there is little improvement by adjusting this fraction
    explainer_gen = shap.GradientExplainer(
        gen_model, org_x_vl)  #this has to be the same otherwise problems
    shap_values_gen = explainer_gen.shap_values(
        org_x_vl[:int(len(org_x_vl)
                      )])  # there is no improvement by adjusting this fraction

    return shap_values_org, shap_values_gen
Beispiel #11
0
 def build_explainer(model: Model, X_reference: np.ndarray) -> typing.Any:
     """
     Method to build model explainer
     :param model: tensorflow functional API model
     :param X_reference: array of data which should be explained
     :return: shap gradient explainer
     """
     background_size = 100
     if len(X_reference) > background_size:
         background = X_reference[np.random.choice(X_reference.shape[0],
                                                   background_size,
                                                   replace=False)]
     else:
         background = X_reference
     return shap.GradientExplainer(model, background)
Beispiel #12
0
    def __init__(self, model: models.PredictiveModel):
        self.model = model
        self.abt = model.abt
        self.conf = model.abt.conf
        self.cn_predictors = self.abt.cn_predictors

        if utils.is_tree_model(self.model.estimator):
            self.explainer = sp.TreeExplainer(self.model.estimator)
        else:
            print('not tree-based estimator: using kernel-explainer')
            data = model.abt.get_fm(seg=SegTypes.holdout)
            self.explainer = sp.GradientExplainer(self.model.estimator,
                                                  data=data)

        self.shap = ShapValues()
        self.freqsev: FreqSev = None
def shap(model,X,ys=None):
    import shap
    # explain predictions of the model on four images
    if tf.is_tensor(X):
        X = X.numpy()
    e = shap.GradientExplainer(model, X)
    # ...or pass tensors directly
    # e = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
    shap_values = e.shap_values(X)
    shap_cls = np.array(shap_values)
    if ys is None:
        vals = np.mean(np.abs(shap_cls), axis=0)
    else:
        idx_y = np.argmax(ys, axis=1)
        vals = np.zeros(shap_cls.shape[1:])
        for i in range(shap_cls.shape[1]):
            vals[i, :] = shap_cls[idx_y[i], i, :]
    return vals
Beispiel #14
0
    def SHAP_img(self):
        def map2layer(x, layer):
            feed_dict = dict(
                zip([self.model.layers[0].input],
                    [preprocess_input(x.copy())]))
            return K.get_session().run(self.model.layers[layer].input,
                                       feed_dict)

        e = shap.GradientExplainer(
            (model.layers[self.img_layers].input, model.layers[-1].output),
            map2layer(self.all_input, self.img_layers),
            local_smoothing=0)

        shap_values, predict = e.shap_values(map2layer(self.input,
                                                       self.img_layers),
                                             ranked_outputs=1)
        index_names = np, vectorize(lambda x: self.class_names[str(x)][1])(
            indexes)
        return shape_values, predict
def test__build_explainer():
    """
    Function for testing build_explainer method of ShapGradientExplainer
    """
    # ARRANGE
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(2, 2)),
        tf.keras.layers.Dense(2, activation='relu')
    ])
    X_reference = np.ones(shape=(200, 2, 2))
    explainer = gradient_shap.ShapGradientExplainer()

    background_size = 100
    background = X_reference[np.random.choice(X_reference.shape[0], background_size, replace=False)]
    explainer_model_expected = shap.GradientExplainer(model, background)

    # ACT
    explainer_model_out = explainer.build_explainer(model, X_reference)

    # ASSERT
    assert(type(explainer_model_out) is type(explainer_model_expected))
Beispiel #16
0
    def run_test(train_loader, test_loader, interim):
        class Net(nn.Module):
            def __init__(self):
                super(Net, self).__init__()
                self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
                self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
                self.conv2_drop = nn.Dropout2d()
                self.fc1 = nn.Linear(320, 50)
                self.fc2 = nn.Linear(50, 10)

            def forward(self, x):
                x = F.relu(F.max_pool2d(self.conv1(x), 2))
                x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
                x = x.view(-1, 320)
                x = F.relu(self.fc1(x))
                x = F.dropout(x, training=self.training)
                x = self.fc2(x)
                return F.log_softmax(x, dim=1)

        model = Net()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

        def train(model, device, train_loader, optimizer, epoch, cutoff=2000):
            model.train()
            num_examples = 0
            for batch_idx, (data, target) in enumerate(train_loader):
                num_examples += target.shape[0]
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = F.nll_loss(output, target)
                loss.backward()
                optimizer.step()
                if batch_idx % 10 == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                          format(epoch, batch_idx * len(data),
                                 len(train_loader.dataset),
                                 100. * batch_idx / len(train_loader),
                                 loss.item()))
                if num_examples > cutoff:
                    break

        device = torch.device('cpu')
        train(model, device, train_loader, optimizer, 1)

        next_x, next_y = next(iter(train_loader))
        np.random.seed(0)
        inds = np.random.choice(next_x.shape[0], 20, replace=False)
        if interim:
            e = shap.GradientExplainer((model, model.conv1),
                                       next_x[inds, :, :, :])
        else:
            e = shap.GradientExplainer(model, next_x[inds, :, :, :])
        test_x, test_y = next(iter(test_loader))
        shap_values = e.shap_values(test_x[:1], nsamples=1000)

        if not interim:
            # unlike deepLIFT, Integrated Gradients aren't necessarily consistent for interim layers
            model.eval()
            model.zero_grad()
            with torch.no_grad():
                diff = (model(test_x[:1]) -
                        model(next_x[inds, :, :, :])).detach().numpy().mean(0)
            sums = np.array(
                [shap_values[i].sum() for i in range(len(shap_values))])
            d = np.abs(sums - diff).sum()
            assert d / np.abs(diff).sum() < 0.05, "Sum of SHAP values " \
                                                  "does not match difference! %f" % (d / np.abs(diff).sum())
Beispiel #17
0
#shap_raw_store = {}
#for j in out_index:
#    shap_raw_store[j]=[]

for i in index:
    #print(i)
    vae_state_temp = copy.deepcopy(vae_state)
    vae_state_temp['weight.0'] = vae_state_temp['weight.0'][:, i:(i + 1)]
    vae_state_temp['weight.1'] = vae_state_temp['weight.1'][:, i:(i + 1)]
    vae_state_temp['weight.2'] = vae_state_temp['weight.2'][:, i:(i + 1)]
    vae_state_temp['P.fc3.weight'] = vae_state_temp['P.fc3.weight'][i:(i +
                                                                       1), :]
    vae.load_state_dict(vae_state_temp)

    e = shap.GradientExplainer(vae, [X, *[x[:, i:(i + 1)] for x in X_mat]])
    shap_values = e.shap_values([X, *[x[:, i:(i + 1)] for x in X_mat]],
                                nsamples=200)
    shap_values[0][X_isnan] = float("NaN")
    for i_mat in range(n_mat):
        shap_values[i_mat + 1][X_mat_isnan[i_mat][:, i:(i + 1)]] = float("NaN")
    #for j in out_index:
    #    shap_raw_store[j].append(shap_values[0][:,j])
    shap_store.append(
        np.concatenate([np.nanmean(np.abs(x), axis=0) for x in shap_values]))

# export
shap_store = pd.DataFrame(
    np.concatenate([
        index[:, np.newaxis],
        np.concatenate([x[:, np.newaxis] for x in shap_store], axis=1).T
Beispiel #18
0
    patient_ids = df['patient_id'].to_numpy()
    to_explain = patient_ids[:background * 2]

    background_patient_ids = df.head(background)['patient_id'].to_numpy()

    background_inputs = [
        os.path.join(data_dir, patient_id)
        for patient_id in background_patient_ids
    ]
    background_inputs = torch.stack([
        torch.from_numpy(prepare_input(input)).float()
        for input in background_inputs
    ]).to(device)
    background_inputs = background_inputs[:, use_leads, :]

    e = shap.GradientExplainer(model, background_inputs)

    if not os.path.exists(result_path):
        svs = []
        y_scores = []
        for patient_id in tqdm(to_explain):
            input = os.path.join(data_dir, patient_id)
            inputs = torch.stack(
                [torch.from_numpy(prepare_input(input)).float()]).to(device)
            inputs = inputs[:, use_leads, :]
            y_scores.append(
                torch.sigmoid(model(inputs)).detach().cpu().numpy())
            sv = np.array(e.shap_values(
                inputs))  # (n_classes, n_samples, n_leads, n_points)
            svs.append(sv)
        svs = np.concatenate(svs, axis=1)
Beispiel #19
0
input_shape = cfg['DATA']['IMG_DIM'] + [3]
model_def = resnet50v2
#print("input_shape",input_shape)
model = model_def(cfg['NN']['DCNN_BINARY'], input_shape, metrics, 2, output_bias=output_bias, gpus=num_gpus)

# load pre-trained model 
model.load_weights('model20201115-050425.h5')

#Load Image
img_width, img_height = 224, 224
img = image.load_img(r'C:\Temp\shap2\000001-10.jpg', target_size=(img_width, img_height))

to_explain  = image.img_to_array(img).reshape(1, img_width, img_height, 3)
X = to_explain.copy().reshape(1, img_width, img_height, 3)

#Define Classes
class_names = { '0': ['negative', 'no-covid'], '1': ['positive', 'covid']  }

# explain how the input to the 7th layer of the model explains the top two classes
def map2layer(x, layer):
    feed_dict = dict(zip([model.layers[0].input], [preprocess_input(x.copy())]))
    return tf.compat.v1.keras.backend.get_session().run(model.layers[layer].input, feed_dict)

e = shap.GradientExplainer((model.layers[7].input, model.layers[-1].output), map2layer(preprocess_input(X.copy()), 7))
shap_values,indexes = e.shap_values(map2layer(to_explain, 7), ranked_outputs=2)

# get the names for the classes
index_names = np.vectorize(lambda x: class_names[str(x)][1])(indexes)

# plot the explanations
shap.image_plot(shap_values, to_explain, index_names)
Beispiel #20
0
# load the ImageNet class names
url = "https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json"
fname = shap.datasets.cache(url)
with open(fname) as f:
    class_names = json.load(f)


# explain how the input to the 7th layer of the model explains the top two classes
def map2layer(x, layer):
    feed_dict = dict(zip([model.layers[0].input],
                         [preprocess_input(x.copy())]))
    return K.get_session().run(model.layers[layer].input, feed_dict)


e = shap.GradientExplainer((model.layers[7].input, model.layers[-1].output),
                           map2layer(preprocess_input(X.copy()), 7))
shap_values, indexes = e.shap_values(map2layer(to_explain, 7),
                                     ranked_outputs=2)

# get the names for the classes
index_names = np.vectorize(lambda x: class_names[str(x)][1])(indexes)

# plot the explanations
shap.image_plot(shap_values, to_explain, index_names)

# Explain with local smoothing

# explain how the input to the 7th layer of the model explains the top two classes
explainer = shap.GradientExplainer(
    (model.layers[7].input, model.layers[-1].output),
    map2layer(preprocess_input(X.copy()), 7),
        shap.image_plot(shap_numpy_stackd[args.focused_ind], test_numpy_stackd)
    else:
        labels_for_plot = np.matmul(
            np.ones([len(shap_numpy_stackd[0]), 1], dtype=int),
            np.arange(num_classes).reshape([1, num_classes]))

        shap.image_plot(shap_numpy_stackd, test_numpy_stackd, labels_for_plot)

        # white_temp = np.ones(test_numpy_stackd.shape)
        # black_temp = np.zeros(test_numpy_stackd.shape)

        # shap.image_plot(shap_numpy_stackd, black_temp, labels_for_plot)
        # shap.image_plot(shap_numpy_stackd, white_temp, labels_for_plot)

    if args.grad_exp:
        e = shap.GradientExplainer(model, model.fc4)
        shap_values, indexes = e.shap_values(test_images_samples)

        # plot the explanations
        shap_values = [
            np.swapaxes(np.swapaxes(s, 2, 3), 1, -1) for s in shap_values
        ]

        shap.image_plot(shap_values, test_images_samples, labels_for_plot)

    if args.plot_colored:
        unioned_test_numpy = np.concatenate(
            (test_numpy[0], test_numpy[1], test_numpy[2]), axis=3)

        for i in range(len(unioned_test_numpy)):
            plt.imshow(unioned_test_numpy[i], cmap=plt.get_cmap('gray'))
Beispiel #22
0
def plot_shap_vals(shapvals_filepath, data_filepath, indirect_output_filepath,
                   direct_output_filepath, predictor_filepath, n_instances):

    rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})
    ## for Palatino and other serif fonts use:
    rc('font', **{'family': 'serif', 'serif': ['Palatino']})

    # use TeX if the user has it installed...
    sent_tex_warning = False
    try:
        rc('text', usetex=True)
    except:
        rc('text', usetex=False)
        print(
            "WARNING: Producing figure without latex since it is not installed."
        )
        sent_tex_warning = True

    shap.initjs()

    shap_vals_df = pd.read_csv(shapvals_filepath)
    feats_df = pd.read_csv(data_filepath)
    feat_names = [
        "x", "x2", "xSquared", "y", "y2", "ySquared", "z", "z2", "zSquared"
    ]

    feats_df = feats_df[feat_names]
    shap_vals_df = shap_vals_df[feat_names]

    feats = feats_df[0:n_instances].values
    shap_vals = shap_vals_df[0:n_instances].values

    try:
        shap.summary_plot(shap_vals, feats, show=False, plot_type="dot", sort=False, feature_names= [r"$x$",r"$2x$",\
                                                               r"$x^2$",r"$y$",r"$2y$", r"$y^2$",r"$c$",r"$2c$",r"$c^2$"])
    except:
        plt.clf()
        shap.summary_plot(shap_vals,
                          feats,
                          show=False,
                          plot_type="dot",
                          sort=False,
                          feature_names=[
                              "x", "2x", "x^2", "y", "2y", "y^2", "c", "2c",
                              "c^2"
                          ])
        if not sent_tex_warning:
            print(
                "WARNING: Producing figure without latex since it is not installed."
            )
            sent_tex_warning = True

    plt.savefig(indirect_output_filepath)
    plt.clf()

    # plot direct influence for baseline
    predictor = load_model(predictor_filepath)
    e = shap.GradientExplainer(predictor, feats, local_smoothing=0)
    shap_values, classes = e.shap_values(feats, ranked_outputs=1)

    # use TeX if the user has it installed...
    try:
        shap.summary_plot(shap_values[0],
                          feats,
                          show=False,
                          plot_type="dot",
                          sort=False,
                          feature_names=[
                              r"$x$", r"$2x$", r"$x^2$", r"$y$", r"$2y$",
                              r"$y^2$", r"$c$", r"$2c$", r"$c^2$"
                          ])
    except:
        plt.clf()
        shap.summary_plot(shap_values[0],
                          feats,
                          show=False,
                          plot_type="dot",
                          sort=False,
                          feature_names=[
                              "x", "2x", "x^2", "y", "2y", "y^2", "c", "2c",
                              "c^2"
                          ])
        if not sent_tex_warning:
            print(
                "WARNING: Producing figure without latex since it is not installed."
            )

    plt.savefig(direct_output_filepath)
# %%
# load the model
model = models.vgg16(pretrained=True).eval()

X, y = shap.datasets.imagenet50()

X /= 255

to_explain = X[[39, 41]]

# load the ImageNet class names
url = "https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json"
fname = shap.datasets.cache(url)
with open(fname) as f:
    class_names = json.load(f)

e = shap.GradientExplainer((model, model.features[7]), normalize(X))
shap_values, indexes = e.shap_values(normalize(to_explain),
                                     ranked_outputs=2,
                                     nsamples=200)

# get the names for the classes
index_names = np.vectorize(lambda x: class_names[str(x)][1])(indexes)

# plot the explanations
shap_values = [np.swapaxes(np.swapaxes(s, 2, 3), 1, -1) for s in shap_values]

shap.image_plot(shap_values, to_explain, index_names)

# %%
# Load the latest production model and its components
pyfunc_model = mlflow.pyfunc.load_model("models:/nih_xray/production")
transforms = pyfunc_model._model_impl.python_model.transforms
model = pyfunc_model._model_impl.python_model.model
disease_names = pyfunc_model._model_impl.python_model.disease_names

# Let's pick an example that definitely exhibits some affliction
df = spark.read.table("nih_xray.images")
first_row = df.filter("Infiltration").select("image").limit(1).toPandas()
image = np.frombuffer(first_row["image"].item(), dtype=np.uint8).reshape((224,224))

# Only need a small sample for explanations
sample = df.sample(0.02).select("image").toPandas()
sample_tensor = torch.cat([transforms(np.frombuffer(sample["image"].iloc[idx], dtype=np.uint8).reshape((224,224))).unsqueeze(dim=0) for idx in range(len(sample))])

e = shap.GradientExplainer((model, model.densenet.features[6]), sample_tensor, local_smoothing=0.1)
shap_values, indexes = e.shap_values(transforms(image).unsqueeze(dim=0), ranked_outputs=3, nsamples=300)

shap.image_plot(shap_values[0][0].mean(axis=0, keepdims=True),
                transforms(image).numpy().mean(axis=0, keepdims=True))

# COMMAND ----------

import pandas as pd

pd.DataFrame(torch.sigmoid(model(transforms(image).unsqueeze(dim=0))).detach().numpy(), columns=disease_names).iloc[:,indexes.numpy()[0]]

# COMMAND ----------

# MAGIC %md 
# MAGIC 
Beispiel #25
0
          verbose=1,
          validation_data=(x_test, y_test))

#Análise de desempenho
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

shap_values_dic = {}

# dados de treino nos quais o modelo foi construído
background = x_train[numpy.random.choice(x_train.shape[0], 20, replace=False)]

# Explicação do modelo para as classes
e1 = shap.DeepExplainer(model, background)
e2 = shap.GradientExplainer(model, background)

shap_values_dic['DeepExplainer'] = {}
shap_values_dic['GradientExplainer']= {}
# plot dos pesos espacialmente distribuídos
i = 4
for i in range(len(x_test)):
  print('Exemplo n°: ', str(i))
  shap_values1 = e1.shap_values(-x_test[i:i+1])
  #print('shap_value - DeepExplainer', str(sum(sum(sum(sum(shap_values1))))))
  shap.image_plot(shap_values1, x_test[i:i+1],labels = y_test[i:i+1], width= 10, hspace=0.2,aspect= 0.5)
  shap_values2 = e2.shap_values(-x_test[i:i+1])
  #print('shap_value - GradientExplainer', str(sum(sum(sum(sum(shap_values2))))))
  #shap.image_plot(shap_values2, x_test[i:i+1],labels = y_test[i:i+1], width= 10, hspace=0.2,aspect= 0.5) 
  shap_values_dic['DeepExplainer'][i] = sum(sum(sum(sum(shap_values1))))
  shap_values_dic['GradientExplainer'][i] = sum(sum(sum(sum(shap_values2))))
Beispiel #26
0
		if model_name == "inception":
			trans.insert(0, torchvision.transforms.Pad((21,22,22,21)))
		elif model_name == 'vgg':
			trans.insert(0, torchvision.transforms.Resize((244,244)))
	if data_augment:
		trans.insert(0, fancy_pca())
		trans.insert(0, torchvision.transforms.RandomRotation(180))
		trans.insert(0, torchvision.transforms.RandomHorizontalFlip(p=0.5))

	train_dataset = torchvision.datasets.ImageFolder(
			root=data_path,
			transform=torchvision.transforms.Compose(trans)
		)

	train_loader = torch.utils.data.DataLoader(
		train_dataset,
		batch_size=batch_size,
		num_workers=0,
		shuffle=True
	)
	return train_loader

def map2layer(x):
    
    return net(x)

e = shap.GradientExplainer(
    (model.layers[7].input, model.layers[-1].output),
    map2layer(X, 7),
    local_smoothing=0 # std dev of smoothing noise
Beispiel #27
0
def test_tf_keras_mnist_cnn():
    """ This is the basic mnist cnn example from kerasself.
    """

    try:
        import tensorflow as tf
        from tensorflow.python import keras
        from tensorflow.python.keras.models import Sequential
        from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Activation
        from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
        from tensorflow.python.keras import backend as K
    except Exception as e:
        print("Skipping test_tf_keras_mnist_cnn!")
        return
    import shap

    batch_size = 128
    num_classes = 10
    epochs = 1

    # input image dimensions
    img_rows, img_cols = 28, 28

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    model = Sequential()
    model.add(
        Conv2D(32,
               kernel_size=(3, 3),
               activation='relu',
               input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # 128
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.fit(x_train[:1000, :],
              y_train[:1000, :],
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test[:1000, :], y_test[:1000, :]))

    # explain by passing the tensorflow inputs and outputs
    inds = np.random.choice(x_train.shape[0], 200, replace=False)
    e = shap.GradientExplainer((model.layers[0].input, model.layers[-1].input),
                               x_train[inds, :, :])
    shap_values = e.shap_values(x_test[:1], nsamples=100)

    sess = tf.keras.backend.get_session()
    diff = sess.run(model.layers[-1].input, feed_dict={model.layers[0].input: x_test[:1]}) - \
    sess.run(model.layers[-1].input, feed_dict={model.layers[0].input: x_train[inds,:,:]}).mean(0)

    sums = np.array([shap_values[i].sum() for i in range(len(shap_values))])
    assert np.abs(sums - diff).sum(
    ) < 1e-4, "Sum of SHAP values does not match difference!"
Beispiel #28
0
 def get_shap_attributions(self):
     explainer = shap.GradientExplainer(self.model, self.x_train.numpy())
     shap_values = explainer.shap_values(self.x_train.numpy())
     return self.shap_values_to_attr(shap_values[0])
Beispiel #29
0
for i in range(len(IMG_LIST)):
    if i == len(IMG_LIST) - 1:
        img_exp = 'int(IMG_LIST[{}])'.format(i)
    else:
        img_exp = 'int(IMG_LIST[{}]),'.format(i)
    exp += img_exp

# choose images
to_explain = X[[eval(exp)]]

# features_layer=model.features[7]
exec("features_layer=model." + FEATURE_LAYER)

#explainer = shap.GradientExplainer((model, features_layer), normalize(X), local_smoothing=0.5)
explainer = shap.GradientExplainer((model, features_layer),
                                   normalize(X),
                                   local_smoothing=0.5)
shap_values, indexes = explainer.shap_values(normalize(to_explain),
                                             ranked_outputs=RANKED_OUTPUTS,
                                             nsamples=IMG_SAMPLES)

# get the names for the classes
dic_class_names = {i: class_names[i] for i in range(0, len(class_names))}

index_names = np.vectorize(lambda x: dic_class_names[x])(indexes)

# plot the explanations
shap_values = [np.swapaxes(np.swapaxes(s, 2, 3), 1, -1) for s in shap_values]


# image plot
Beispiel #30
0
def plot_shap(model,
              dataset_opts,
              transform_opts,
              batch_size,
              outputfilename,
              n_outputs=1,
              method='deep',
              local_smoothing=0.0,
              n_samples=20,
              pred_out=False):
    """Plot shapley attributions overlaid on images for classification tasks.

	Parameters
	----------
	model:nn.Module
		Pytorch model.
	dataset_opts:dict
		Options used to configure dataset
	transform_opts:dict
		Options used to configure transformers.
	batch_size:int
		Batch size for training.
	outputfilename:str
		Output filename.
	n_outputs:int
		Number of top outputs.
	method:str
		Gradient or deep explainer.
	local_smoothing:float
		How much to smooth shapley map.
	n_samples:int
		Number shapley samples to draw.
	pred_out:bool
		Label images with binary prediction score?

	"""
    import torch
    from torch.nn import functional as F
    import numpy as np
    from torch.utils.data import DataLoader
    import shap
    from pathflowai.datasets import DynamicImageDataset
    import matplotlib
    from matplotlib import pyplot as plt
    from pathflowai.sampler import ImbalancedDatasetSampler

    out_transform = dict(sigmoid=F.sigmoid,
                         softmax=F.softmax,
                         none=lambda x: x)
    binary_threshold = dataset_opts.pop('binary_threshold')
    num_targets = dataset_opts.pop('num_targets')

    dataset = DynamicImageDataset(**dataset_opts)

    if dataset_opts['classify_annotations']:
        binarizer = dataset.binarize_annotations(
            num_targets=num_targets, binary_threshold=binary_threshold)
        num_targets = len(dataset.targets)

    dataloader_val = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=10,
                                shuffle=True if num_targets > 1 else False,
                                sampler=ImbalancedDatasetSampler(dataset)
                                if num_targets == 1 else None)
    #dataloader_test = DataLoader(dataset,batch_size=batch_size,num_workers=10, shuffle=False)

    background, y_background = next(iter(dataloader_val))
    if method == 'gradient':
        background = torch.cat([background, next(iter(dataloader_val))[0]], 0)
    X_test, y_test = next(iter(dataloader_val))

    if torch.cuda.is_available():
        background = background.cuda()
        X_test = X_test.cuda()

    if pred_out != 'none':
        if torch.cuda.is_available():
            model2 = model.cuda()
        y_test = out_transform[pred_out](model2(X_test)).detach().cpu()

    y_test = y_test.numpy()

    if method == 'deep':
        e = shap.DeepExplainer(model, background)
        s = e.shap_values(X_test, ranked_outputs=n_outputs)
    elif method == 'gradient':
        e = shap.GradientExplainer(model,
                                   background,
                                   batch_size=batch_size,
                                   local_smoothing=local_smoothing)
        s = e.shap_values(X_test, ranked_outputs=n_outputs, nsamples=n_samples)

    if y_test.shape[1] > 1:
        y_test = y_test.argmax(axis=1)

    if n_outputs > 1:
        shap_values, idx = s
    else:
        shap_values, idx = s, y_test

    #print(shap_values) # .detach().cpu()

    if num_targets == 1:
        shap_numpy = [np.swapaxes(np.swapaxes(shap_values, 1, -1), 1, 2)]
    else:
        shap_numpy = [
            np.swapaxes(np.swapaxes(s, 1, -1), 1, 2) for s in shap_values
        ]
        #print(shap_numpy.shape)
    X_test_numpy = X_test.detach().cpu().numpy()
    X_test_numpy = X_test_numpy.transpose((0, 2, 3, 1))
    for i in range(X_test_numpy.shape[0]):
        X_test_numpy[i, ...] *= np.array(transform_opts['std'])
        X_test_numpy[i, ...] += np.array(transform_opts['mean'])
    X_test_numpy = X_test_numpy.transpose((0, 3, 1, 2))
    test_numpy = np.swapaxes(np.swapaxes(X_test_numpy, 1, -1), 1, 2)
    if pred_out != 'none':
        labels = y_test.astype(str)
    else:
        labels = np.array([[(dataloader_val.dataset.targets[i[j]]
                             if num_targets > 1 else str(i))
                            for j in range(n_outputs)]
                           for i in idx])  #[:,np.newaxis] # y_test
    if 0 and (len(labels.shape) < 2 or labels.shape[1] == 1):
        labels = labels.flatten()  #[:np.newaxis]

    #print(labels.shape,shap_numpy.shape[0])
    plt.figure()
    shap.image_plot(
        shap_numpy, test_numpy, labels
    )  # if num_targets!=1 else shap_values -test_numpy , labels=dataloader_test.dataset.targets)
    plt.savefig(outputfilename, dpi=300)