def get_predictions_after_masking(self, explained_model, X, y, downsample_factors=(1,), batch_size=64,
                                      flatten=False):
        Validation.check_dataset(X, y)

        num_batches = int(np.ceil(len(X) / float(batch_size)))

        all_outputs = []
        for batch_idx in range(num_batches):
            x_batch = X[batch_idx*batch_size:(batch_idx+1)*batch_size]
            y_pred = MaskingUtil.get_prediction(explained_model, x_batch, flatten=flatten)

            x_imputed = []
            for x_i in x_batch:
                x_curr = []
                for j in range(len(x_i)):
                    x_i_imputed_j = np.concatenate([x_i[:j], x_i[j+1:]], axis=0)  # Drop entry at each index j.
                    x_curr.append(x_i_imputed_j)
                x_imputed.append(x_curr)

            all_y_pred_imputed = []
            for j, x_imputed_curr in enumerate(x_imputed):
                if len(x_imputed_curr) == 0:
                    y_pred_imputed = y_pred[j].reshape((1, -1))
                else:
                    y_pred_imputed = MaskingUtil.get_prediction(explained_model, x_imputed_curr, flatten=flatten)
                all_y_pred_imputed.append(y_pred_imputed)

            all_outputs.append((x_batch, y_pred, all_y_pred_imputed))

        all_outputs = [np.concatenate(list(map(partial(lambda x, dim: x[dim], dim=dim), all_outputs)))
                       for dim in range(len(all_outputs[0]))]

        return all_outputs
Esempio n. 2
0
    def _fit_single(self, model, X, y, masked_data=None):
        Validation.check_dataset(X, y)

        if len(X) != 0:
            # Pre-compute target outputs if none are passed.
            if masked_data is None:
                output_dim = Validation.get_output_dimension(y)
                masked_data = self.masking_operation.get_predictions_after_masking(self.explained_model, X, y,
                                                                                   batch_size=
                                                                                   self.model_builder.batch_size,
                                                                                   downsample_factors=
                                                                                   self.downsample_factors,
                                                                                   flatten=
                                                                                   self.flatten_for_explained_model)

                masked_data = TensorflowCXPlain._clean_output_dims(output_dim, masked_data)

            self.last_masked_data = masked_data

            if self.model_filepath is None:
                from tempfile import NamedTemporaryFile
                model_filepath = NamedTemporaryFile(delete=False).name
            else:
                model_filepath = self.model_filepath

            self.last_history = self.model_builder.fit(model, masked_data, y, model_filepath)
        return self
Esempio n. 3
0
    def _build_model(self, X, y):
        Validation.check_dataset(X, y)

        if Validation.is_variable_length(X):
            raise ValueError("Variable length inputs to CXPlain are currently not supported.")

        n, p = Validation.get_input_dimension(X)
        output_dim = Validation.get_output_dimension(y)

        if self.model is None:
            if self.num_models == 1:
                build_fun = self._build_single
            else:
                build_fun = self._build_ensemble

            self.model, self.prediction_model = build_fun(input_dim=p, output_dim=output_dim)
Esempio n. 4
0
            def predict_with_i_imputed(x, index):
                x_imputed = math_ops.copy(x)
                original_shape = math_ops.shape(x_imputed)
                target_shape = (original_shape[0],
                                math_ops.as_int(
                                    math_ops.prod(original_shape[1:])))

                if downsampling_factor == 1:
                    needs_reshape = len(original_shape) > 2
                    if needs_reshape:
                        x_imputed = math_ops.reshape(x_imputed, target_shape)

                    x_imputed[:, index] = 0

                    if needs_reshape:
                        x_imputed = math_ops.reshape(x_imputed, original_shape)
                else:
                    full_shape = Validation.get_full_input_shape(
                        original_shape[0], input_dim)
                    mask = MaskingUtil.get_ith_mask(index,
                                                    input_dim,
                                                    downsample_factors,
                                                    math_ops=math_ops)
                    x_imputed = math_ops.reshape(x_imputed, full_shape)

                    inverted_mask = (mask - 1.) * -1.
                    x_imputed = math_ops.multiply(
                        x_imputed,
                        math_ops.expand_dims(math_ops.cast(
                            inverted_mask, float),
                                             axis=-1))

                return x_imputed
Esempio n. 5
0
    def predict(self, X, confidence_level=None):
        """
        Estimates the importance of the inputs in __X__ towards the __self.explained_model__'s decision.
        Provides confidence intervals if __confidence_level__ is not None.

        :param X: The data samples to be evaluated. The first dimension must be the number of samples.
        :param confidence_level: The confidence level used to report the confidence intervals, i.e. a
                                 confidence level of 0.95 would indicate that you wish to obtain the
                                 0.025 and 0.975 quantiles of the output distribution. If None,
                                 no confidence is returned. The CXPlain instance must have been
                                 initialised with __num_models__ > 1 in order to be able to
                                 compute confidence intervals. (Optional, default: None).
        :return: (i) An array of predictions that estimate the importace of each input feature in __X__
                 based on the sample data __X__. The first dimension of the returned array will be the sample dimension
                 and it will match that of __X__, if confidence_level is None,
                 or
                 (ii) a tuple of two entries with the first entry being the predictions and the second entry being
                 the confidence interval (CI) for each provided feature importance estimate reported in the first entry.
                 The last dimension of the confidence interval reported is (2,) and the entries are
                 (CI lower bound, CI upper bound) if confidence_level is not None
        :exception AssertionError Thrown if __predict__ was called without first fitting the explanation model
                                  using __fit__.
        :exception ValueError Thrown if the value of __confidence_level__ was not in the range [0, 1].

        """
        if self.prediction_model is None:
            raise AssertionError(
                "Model must be initialised when calling __predict__. "
                "Did you forget to __fit__ the explanation model?")

        if confidence_level is not None and \
                (confidence_level <= 0.0 or confidence_level >= 1.0 or \
                 np.isclose(confidence_level, 0.) or \
                 np.isclose(confidence_level, 1.)):
            raise ValueError(
                "The __confidence_level__ must be a value between 0 (exclusive) and 1 (exclusive)."
            )

        if self.num_models == 1:
            ret_val = self._predict_single(self.prediction_model, X)
        else:
            ret_val = self._predict_multiple(X,
                                             confidence_level=confidence_level)

        target_shape = Validation.get_attribution_shape(X)

        if len(target_shape) >= 4:
            confidence_shape = target_shape[:-1] + (2, )
        else:
            confidence_shape = target_shape + (2, )

        if isinstance(ret_val, tuple):
            ret_val = ret_val[0].reshape(target_shape), ret_val[1].reshape(
                confidence_shape)
        else:
            ret_val = ret_val.reshape(target_shape)

        return ret_val
Esempio n. 6
0
 def test_input_shape_tabular_valid(self):
     test_num_samples = [1, 2, 1024]
     test_num_features = [1, 2, 1024]
     for num_samples in test_num_samples:
         for num_features in test_num_features:
             x = np.random.random_sample(size=(num_samples, num_features))
             n, input_dim = Validation.get_input_dimension(x)
             self.assertEqual(n, num_samples)
             self.assertEqual(input_dim, (num_features,))
Esempio n. 7
0
 def test_input_shape_time_series_variable_valid(self):
     test_num_samples = [2, 3, 1024]
     test_num_lens = [1, 2, 256]
     test_num_features = [1, 2, 1024]
     for num_samples in test_num_samples:
         for num_features in test_num_features:
             x = [np.random.random_sample(size=(test_num_lens[i % len(test_num_lens)], num_features))
                  for i in range(num_samples)]
             n, input_dim = Validation.get_input_dimension(x)
             self.assertEqual(n, num_samples)
             self.assertEqual(input_dim, (None, num_features))
Esempio n. 8
0
 def test_input_shape_time_series_fixed_valid(self):
     test_num_samples = [1, 2, 1024]
     test_num_lens = [1, 2, 256]
     test_num_features = [1, 2, 1024]
     for num_samples in test_num_samples:
         for ts_length in test_num_lens:
             for num_features in test_num_features:
                 x = np.random.random_sample(size=(num_samples, ts_length, num_features))
                 n, input_dim = Validation.get_input_dimension(x)
                 self.assertEqual(n, num_samples)
                 self.assertEqual(input_dim, (ts_length, num_features))
Esempio n. 9
0
 def test_input_shape_invalid_1dim(self):
     with self.assertRaises(ValueError):
         Validation.get_input_dimension([1])
     with self.assertRaises(ValueError):
         Validation.get_input_dimension([1, 2, 3])
     with self.assertRaises(ValueError):
         Validation.get_input_dimension([None])
Esempio n. 10
0
    def __init__(self,
                 explained_model,
                 model_builder,
                 masking_operation,
                 loss,
                 downsample_factors=(1, ),
                 num_models=1):
        super(CXPlain, self).__init__()
        self.explained_model = explained_model
        self.model_builder = model_builder
        self.masking_operation = masking_operation
        self.loss = loss
        self.last_masked_data = None
        self.prediction_model = None

        Validation.check_is_positive_integer_greaterequals_1(
            num_models, var_name="num_models")
        self.num_models = num_models

        Validation.check_downsample_factors_at_initialisation(
            downsample_factors)
        self.downsample_factors = downsample_factors
Esempio n. 11
0
 def test_input_shape_image_fixed_valid(self):
     test_num_samples = [1, 2, 1024]
     test_num_rows = [1, 2, 256]
     test_num_cols = [1, 2, 256]
     test_num_channels = [1, 2, 3]
     for num_samples in test_num_samples:
         for rows in test_num_rows:
             for cols in test_num_cols:
                 for num_channels in test_num_channels:
                     x = np.random.random_sample(size=(num_samples, rows, cols, num_channels))
                     n, input_dim = Validation.get_input_dimension(x)
                     self.assertEqual(n, num_samples)
                     self.assertEqual(input_dim, (rows, cols, num_channels))
Esempio n. 12
0
 def test_input_shape_volume_fixed_valid(self):
     test_num_samples = [1, 2, 128]
     test_num_voxels = [1, 2, 64]
     test_num_channels = [1, 2, 3]
     for num_samples in test_num_samples:
         for rows in test_num_voxels:
             for cols in test_num_voxels:
                 for depth in test_num_voxels:
                     for num_channels in test_num_channels:
                         x = np.random.random_sample(size=(num_samples, rows, cols, depth, num_channels))
                         n, input_dim = Validation.get_input_dimension(x)
                         self.assertEqual(n, num_samples)
                         self.assertEqual(input_dim, (rows, cols, depth, num_channels))
Esempio n. 13
0
    def get_predictions_after_masking(self,
                                      explained_model,
                                      X,
                                      y,
                                      downsample_factors=(1, ),
                                      batch_size=64,
                                      flatten=False):
        Validation.check_dataset(X, y)

        num_batches = int(np.ceil(len(X) / float(batch_size)))

        all_outputs = []
        for batch_idx in range(num_batches):
            x = X[batch_idx * batch_size:(batch_idx + 1) * batch_size]
            y_pred = MaskingUtil.get_prediction(explained_model,
                                                x,
                                                flatten=flatten)
            x_imputed = MaskingUtil.get_x_imputed(x,
                                                  downsample_factors,
                                                  math_ops=NumpyInterface)

            all_y_pred_imputed = []
            for x_imputed_curr in x_imputed:
                y_pred_imputed = MaskingUtil.get_prediction(explained_model,
                                                            x_imputed_curr,
                                                            flatten=flatten)
                all_y_pred_imputed.append(y_pred_imputed)

            all_y_pred_imputed = np.stack(all_y_pred_imputed).swapaxes(0, 1)

            all_outputs.append((x, y_pred, all_y_pred_imputed))

        all_outputs = [
            np.concatenate(
                list(map(partial(lambda x, dim: x[dim], dim=dim),
                         all_outputs))) for dim in range(len(all_outputs[0]))
        ]
        return all_outputs
Esempio n. 14
0
 def __init__(self,
              callbacks=list([]),
              early_stopping_patience=12,
              batch_size=64,
              num_epochs=100,
              validation_fraction=0.1,
              shuffle=True,
              learning_rate=0.0001,
              optimizer=None,
              verbose=0):
     self.batch_size = batch_size
     Validation.check_is_positive_integer_greaterequals_1(
         num_epochs, var_name="num_epochs")
     self.num_epochs = num_epochs
     Validation.check_is_fraction(validation_fraction,
                                  var_name="validation_fraction")
     self.validation_fraction = validation_fraction
     self.shuffle = shuffle
     self.learning_rate = learning_rate
     self.optimizer = optimizer
     self.verbose = verbose
     self.callbacks = callbacks
     self.early_stopping_patience = early_stopping_patience
Esempio n. 15
0
 def test_input_shape_volume_variable_valid(self):
     test_num_samples = [2, 3, 128]
     test_num_lens = [2, 3, 64]
     test_num_features = [1, 2, 3]
     for num_samples in test_num_samples:
         for num_features in test_num_features:
             x = [np.random.random_sample(size=(test_num_lens[i % len(test_num_lens)],
                                                test_num_lens[(i + 1) % len(test_num_lens)],
                                                test_num_lens[(i + 2) % len(test_num_lens)],
                                                num_features))
                  for i in range(num_samples)]
             n, input_dim = Validation.get_input_dimension(x)
             self.assertEqual(n, num_samples)
             self.assertEqual(input_dim, (None, None, None, num_features))
Esempio n. 16
0
    def test_get_attribution_shape_multi_channel(self):
        num_samples, intermediary_dimensions, num_channels = [1, 2, 100], [0, 1, 2, 3], [0, 1, 2, 3]

        for samples in num_samples:
            for num_dims in intermediary_dimensions:
                for channels in num_channels:
                    source_size = (samples,) + (2,)*num_dims
                    if channels != 0:
                        source_size += (channels,)

                    data = np.random.normal(0, 1, size=source_size)

                    if num_dims == 0 and channels == 0:
                        with self.assertRaises(ValueError):
                            Validation.get_attribution_shape(data)
                        continue
                    else:
                        attribution_shape = Validation.get_attribution_shape(data)

                    if len(source_size) >= 3:
                        adjusted_source_size = source_size[:-1] + (1,)
                        self.assertEqual(attribution_shape, adjusted_source_size)
                    else:
                        self.assertEqual(attribution_shape, source_size)
Esempio n. 17
0
    def score(self, X, y, sample_weight=None, masked_data=None):
        """
        Evaluates the performance, in terms of causal loss, of the current CXPlain model

        :param X: The data samples to be evaluated. The first dimension must be the number of samples. (Required)
        :param y: The ground truth labels to be compared to. The first dimension must be the number of
                  samples. (Required)
        :param sample_weight: The sample weight to apply to the samples in X during evaluation. The first dimension
                              must be the number of samples and it must match that of __X__ and __y__.
                              If None, equal weihting is used (Optional, default: None).
        :param masked_data: An array of precomputed masked data as can be obtained from __get_masked_data__.
                            If None, the masked data is computed. If set, the precomputed masked data is used for
                            scoring and computation of the masked data is skipped (Optional, default: None).
        :return: Score results as returned by self.model_builder.evaluate(model, X, y, sample_weight) either
                 (i) as a single score result if __num_models__ = 1 or as a list of score results
                 if __num_models__ is greater than 1.
        :exception AssertionError Thrown if the explanation model has not been fitted using __fit__ yet.
        """
        if self.model is None:
            raise AssertionError("Model must be initialised when calling __predict__. "
                                 "Did you forget to __fit__ the explanation model?")

        output_dim = Validation.get_output_dimension(y)
        if masked_data is None:
            masked_data = self.masking_operation.get_predictions_after_masking(self.explained_model, X, y,
                                                                               batch_size=
                                                                               self.model_builder.batch_size,
                                                                               downsample_factors=
                                                                               self.downsample_factors,
                                                                               flatten=
                                                                               self.flatten_for_explained_model)
            masked_data = TensorflowCXPlain._clean_output_dims(output_dim, masked_data)

        self.last_masked_data = masked_data

        if self.num_models == 1:
            return_value = self._score_single(self.model, masked_data, y, sample_weight)
        else:
            return_value = [self._score_single(model, masked_data, y, sample_weight) for model in self.model]
        return return_value
Esempio n. 18
0
    def check_plot_input(x, attribution, confidence=None):
        # Add sample dim - inputs to __check_plot_input__ are passed without sample dim,
        # but __get_attribution_shape__ expects a sample dim.
        x_with_sample_dim = np.expand_dims(x, axis=0)
        attribution_with_sample_dim = np.expand_dims(attribution, axis=0)
        expected_attribution_shape = Validation.get_attribution_shape(
            x_with_sample_dim)
        if not np.array_equal(attribution_with_sample_dim.shape,
                              expected_attribution_shape):
            raise ValueError("__attribution__ was not of the expected shape. "
                             "__attribution__.shape = {}, "
                             "expected shape = {}.".format(
                                 attribution.shape,
                                 expected_attribution_shape))

        if confidence is not None:
            numel_a, numel_c = np.prod(attribution.shape), np.prod(
                confidence.shape)
            if 2 * numel_a != numel_c:
                raise ValueError(
                    "__confidence__ must have exactly two times as many features as __attribution__. "
                    "Found number of elements (__attribution__) = {},"
                    "Found number of elements (__confidence__) = {}".format(
                        numel_a, numel_c))
Esempio n. 19
0
 def test_is_variable_length_ndarray_true(self):
     (x, _), _ = TestUtil.get_random_variable_length_dataset(max_value=1024)
     x = np.array(x)
     return_value = Validation.is_variable_length(x)
     self.assertEqual(return_value, True)
Esempio n. 20
0
 def test_is_variable_length_padded_false(self):
     (x, _), _ = TestUtil.get_random_variable_length_dataset(max_value=1024)
     x = pad_sequences(x, padding="post", truncating="post", dtype=int)
     return_value = Validation.is_variable_length(x)
     self.assertEqual(return_value, False)
Esempio n. 21
0
    def test_check_is_positive_integer_greaterequals_1(self):
        with self.assertRaises(ValueError):
            Validation.check_is_positive_integer_greaterequals_1(-1)

        with self.assertRaises(ValueError):
            Validation.check_is_positive_integer_greaterequals_1(1.1)

        with self.assertRaises(ValueError):
            Validation.check_is_positive_integer_greaterequals_1(-1.1)

        with self.assertRaises(ValueError):
            Validation.check_is_positive_integer_greaterequals_1(0)

        Validation.check_is_positive_integer_greaterequals_1(1)
        Validation.check_is_positive_integer_greaterequals_1(2)
Esempio n. 22
0
    def build_explanation_model(self,
                                input_dim,
                                output_dim,
                                loss,
                                downsample_factors=(1, )):
        num_indices, num_channels, steps, downsampling_factor =\
            MaskingUtil.get_input_constants(input_dim, downsample_factors)

        if downsampling_factor != 1 and num_indices is None:
            raise ValueError(
                "Attribution downsampling is not supported for variable length inputs. "
                "Please pad your data samples to the same size to use downsampling."
            )

        input_shape = (input_dim, ) if not isinstance(
            input_dim, collections.Sequence) else input_dim
        input_layer = Input(shape=input_shape)
        last_layer = self.build(input_layer)

        if num_indices is None:
            last_layer = Dense(1, activation="linear")(last_layer)
            last_layer = Flatten()(last_layer)  # None * None outputs
            last_layer = Lambda(
                K.softmax, output_shape=K.int_shape(last_layer))(last_layer)
        else:
            last_layer = Flatten()(last_layer)
            last_layer = Dense(num_indices, activation="softmax")(last_layer)

        # Prepare extra inputs for causal loss.
        all_auxiliary_outputs = Input(shape=(output_dim, ), name="all")
        all_but_one_auxiliary_outputs_input = Input(shape=(num_indices,
                                                           output_dim),
                                                    name="all_but_one")

        if num_indices is not None:
            all_but_one_auxiliary_outputs = Lambda(lambda x: tf.unstack(
                x, axis=1))(all_but_one_auxiliary_outputs_input)
            if K.int_shape(all_but_one_auxiliary_outputs_input)[1] == 1:
                all_but_one_auxiliary_outputs = [all_but_one_auxiliary_outputs]
        else:
            all_but_one_auxiliary_outputs = all_but_one_auxiliary_outputs_input

        all_but_one_auxiliary_outputs = Concatenate()(
            all_but_one_auxiliary_outputs)

        causal_loss_fun = CausalLoss(num_indices=num_indices,
                                     loss_function=loss)

        if downsampling_factor != 1:
            last_layer = Reshape(tuple(steps) + (1, ))(last_layer)

            if len(steps) == 1:
                # Add a dummy dimension to enable usage of __resize_images__.
                last_layer = Reshape(tuple(steps) + (1, 1))(last_layer)
                last_layer = Lambda(lambda x: resize_images(
                    x,
                    height_factor=downsample_factors[0],
                    width_factor=1,
                    data_format="channels_last"))(last_layer)
            elif len(steps) == 2:
                last_layer = Lambda(lambda x: resize_images(
                    x,
                    height_factor=downsample_factors[0],
                    width_factor=downsample_factors[1],
                    data_format="channels_last"))(last_layer)
            elif len(steps) == 3:
                last_layer = Lambda(lambda x: resize_volumes(
                    x,
                    depth_factor=downsample_factors[0],
                    height_factor=downsample_factors[1],
                    width_factor=downsample_factors[2],
                    data_format="channels_last"))(last_layer)
            else:
                raise ValueError(
                    "Attribution maps of larger dimensionality than 3D data are not currently supported. "
                    "Requested output dim was: {}.".format(len(steps)))

            attribution_shape = Validation.get_attribution_shape_from_input_shape(
                num_samples=1, input_dim=input_dim)[1:]
            collapsed_attribution_shape = (int(np.prod(attribution_shape)), )
            last_layer = Reshape(collapsed_attribution_shape)(last_layer)

            # Re-normalise to sum = 1 after resizing (sum = __downsampling_factor__ after resizing).
            last_layer = Lambda(lambda x: x / float(downsampling_factor))(
                last_layer)

        final_layer = Concatenate()(
            [last_layer, all_but_one_auxiliary_outputs, all_auxiliary_outputs])

        model = Model(inputs=[
            input_layer, all_auxiliary_outputs,
            all_but_one_auxiliary_outputs_input
        ],
                      outputs=final_layer)

        model = self.compile_model(model,
                                   main_losses=causal_loss_fun,
                                   learning_rate=self.learning_rate,
                                   optimizer=self.optimizer)

        prediction_model = Model(input_layer, last_layer)
        return model, prediction_model
Esempio n. 23
0
 def test_input_shape_invalid_none(self):
     with self.assertRaises(ValueError):
         Validation.get_input_dimension(None)
Esempio n. 24
0
    def test_check_downsample_factors_at_initialisation(self):
        with self.assertRaises(ValueError):
            Validation.check_downsample_factors_at_initialisation((-1,))

        with self.assertRaises(ValueError):
            Validation.check_downsample_factors_at_initialisation(-1)

        with self.assertRaises(ValueError):
            Validation.check_downsample_factors_at_initialisation(1.1)

        with self.assertRaises(ValueError):
            Validation.check_downsample_factors_at_initialisation(-1.1)

        with self.assertRaises(ValueError):
            Validation.check_downsample_factors_at_initialisation((3.3, 2.2))

        Validation.check_downsample_factors_at_initialisation((3, 2, 1))
Esempio n. 25
0
    def test_check_is_fraction(self):
        with self.assertRaises(ValueError):
            Validation.check_is_fraction(-1.0)

        with self.assertRaises(ValueError):
            Validation.check_is_fraction(1.01)

        with self.assertRaises(ValueError):
            Validation.check_is_fraction(-0.01)

        Validation.check_is_fraction(1.0)
        Validation.check_is_fraction(0.0)
        Validation.check_is_fraction(0.00000001)
        Validation.check_is_fraction(1.0 - 0.00000001)