def get_predictions_after_masking(self, explained_model, X, y, downsample_factors=(1,), batch_size=64, flatten=False): Validation.check_dataset(X, y) num_batches = int(np.ceil(len(X) / float(batch_size))) all_outputs = [] for batch_idx in range(num_batches): x_batch = X[batch_idx*batch_size:(batch_idx+1)*batch_size] y_pred = MaskingUtil.get_prediction(explained_model, x_batch, flatten=flatten) x_imputed = [] for x_i in x_batch: x_curr = [] for j in range(len(x_i)): x_i_imputed_j = np.concatenate([x_i[:j], x_i[j+1:]], axis=0) # Drop entry at each index j. x_curr.append(x_i_imputed_j) x_imputed.append(x_curr) all_y_pred_imputed = [] for j, x_imputed_curr in enumerate(x_imputed): if len(x_imputed_curr) == 0: y_pred_imputed = y_pred[j].reshape((1, -1)) else: y_pred_imputed = MaskingUtil.get_prediction(explained_model, x_imputed_curr, flatten=flatten) all_y_pred_imputed.append(y_pred_imputed) all_outputs.append((x_batch, y_pred, all_y_pred_imputed)) all_outputs = [np.concatenate(list(map(partial(lambda x, dim: x[dim], dim=dim), all_outputs))) for dim in range(len(all_outputs[0]))] return all_outputs
def _fit_single(self, model, X, y, masked_data=None): Validation.check_dataset(X, y) if len(X) != 0: # Pre-compute target outputs if none are passed. if masked_data is None: output_dim = Validation.get_output_dimension(y) masked_data = self.masking_operation.get_predictions_after_masking(self.explained_model, X, y, batch_size= self.model_builder.batch_size, downsample_factors= self.downsample_factors, flatten= self.flatten_for_explained_model) masked_data = TensorflowCXPlain._clean_output_dims(output_dim, masked_data) self.last_masked_data = masked_data if self.model_filepath is None: from tempfile import NamedTemporaryFile model_filepath = NamedTemporaryFile(delete=False).name else: model_filepath = self.model_filepath self.last_history = self.model_builder.fit(model, masked_data, y, model_filepath) return self
def _build_model(self, X, y): Validation.check_dataset(X, y) if Validation.is_variable_length(X): raise ValueError("Variable length inputs to CXPlain are currently not supported.") n, p = Validation.get_input_dimension(X) output_dim = Validation.get_output_dimension(y) if self.model is None: if self.num_models == 1: build_fun = self._build_single else: build_fun = self._build_ensemble self.model, self.prediction_model = build_fun(input_dim=p, output_dim=output_dim)
def predict_with_i_imputed(x, index): x_imputed = math_ops.copy(x) original_shape = math_ops.shape(x_imputed) target_shape = (original_shape[0], math_ops.as_int( math_ops.prod(original_shape[1:]))) if downsampling_factor == 1: needs_reshape = len(original_shape) > 2 if needs_reshape: x_imputed = math_ops.reshape(x_imputed, target_shape) x_imputed[:, index] = 0 if needs_reshape: x_imputed = math_ops.reshape(x_imputed, original_shape) else: full_shape = Validation.get_full_input_shape( original_shape[0], input_dim) mask = MaskingUtil.get_ith_mask(index, input_dim, downsample_factors, math_ops=math_ops) x_imputed = math_ops.reshape(x_imputed, full_shape) inverted_mask = (mask - 1.) * -1. x_imputed = math_ops.multiply( x_imputed, math_ops.expand_dims(math_ops.cast( inverted_mask, float), axis=-1)) return x_imputed
def predict(self, X, confidence_level=None): """ Estimates the importance of the inputs in __X__ towards the __self.explained_model__'s decision. Provides confidence intervals if __confidence_level__ is not None. :param X: The data samples to be evaluated. The first dimension must be the number of samples. :param confidence_level: The confidence level used to report the confidence intervals, i.e. a confidence level of 0.95 would indicate that you wish to obtain the 0.025 and 0.975 quantiles of the output distribution. If None, no confidence is returned. The CXPlain instance must have been initialised with __num_models__ > 1 in order to be able to compute confidence intervals. (Optional, default: None). :return: (i) An array of predictions that estimate the importace of each input feature in __X__ based on the sample data __X__. The first dimension of the returned array will be the sample dimension and it will match that of __X__, if confidence_level is None, or (ii) a tuple of two entries with the first entry being the predictions and the second entry being the confidence interval (CI) for each provided feature importance estimate reported in the first entry. The last dimension of the confidence interval reported is (2,) and the entries are (CI lower bound, CI upper bound) if confidence_level is not None :exception AssertionError Thrown if __predict__ was called without first fitting the explanation model using __fit__. :exception ValueError Thrown if the value of __confidence_level__ was not in the range [0, 1]. """ if self.prediction_model is None: raise AssertionError( "Model must be initialised when calling __predict__. " "Did you forget to __fit__ the explanation model?") if confidence_level is not None and \ (confidence_level <= 0.0 or confidence_level >= 1.0 or \ np.isclose(confidence_level, 0.) or \ np.isclose(confidence_level, 1.)): raise ValueError( "The __confidence_level__ must be a value between 0 (exclusive) and 1 (exclusive)." ) if self.num_models == 1: ret_val = self._predict_single(self.prediction_model, X) else: ret_val = self._predict_multiple(X, confidence_level=confidence_level) target_shape = Validation.get_attribution_shape(X) if len(target_shape) >= 4: confidence_shape = target_shape[:-1] + (2, ) else: confidence_shape = target_shape + (2, ) if isinstance(ret_val, tuple): ret_val = ret_val[0].reshape(target_shape), ret_val[1].reshape( confidence_shape) else: ret_val = ret_val.reshape(target_shape) return ret_val
def test_input_shape_tabular_valid(self): test_num_samples = [1, 2, 1024] test_num_features = [1, 2, 1024] for num_samples in test_num_samples: for num_features in test_num_features: x = np.random.random_sample(size=(num_samples, num_features)) n, input_dim = Validation.get_input_dimension(x) self.assertEqual(n, num_samples) self.assertEqual(input_dim, (num_features,))
def test_input_shape_time_series_variable_valid(self): test_num_samples = [2, 3, 1024] test_num_lens = [1, 2, 256] test_num_features = [1, 2, 1024] for num_samples in test_num_samples: for num_features in test_num_features: x = [np.random.random_sample(size=(test_num_lens[i % len(test_num_lens)], num_features)) for i in range(num_samples)] n, input_dim = Validation.get_input_dimension(x) self.assertEqual(n, num_samples) self.assertEqual(input_dim, (None, num_features))
def test_input_shape_time_series_fixed_valid(self): test_num_samples = [1, 2, 1024] test_num_lens = [1, 2, 256] test_num_features = [1, 2, 1024] for num_samples in test_num_samples: for ts_length in test_num_lens: for num_features in test_num_features: x = np.random.random_sample(size=(num_samples, ts_length, num_features)) n, input_dim = Validation.get_input_dimension(x) self.assertEqual(n, num_samples) self.assertEqual(input_dim, (ts_length, num_features))
def test_input_shape_invalid_1dim(self): with self.assertRaises(ValueError): Validation.get_input_dimension([1]) with self.assertRaises(ValueError): Validation.get_input_dimension([1, 2, 3]) with self.assertRaises(ValueError): Validation.get_input_dimension([None])
def __init__(self, explained_model, model_builder, masking_operation, loss, downsample_factors=(1, ), num_models=1): super(CXPlain, self).__init__() self.explained_model = explained_model self.model_builder = model_builder self.masking_operation = masking_operation self.loss = loss self.last_masked_data = None self.prediction_model = None Validation.check_is_positive_integer_greaterequals_1( num_models, var_name="num_models") self.num_models = num_models Validation.check_downsample_factors_at_initialisation( downsample_factors) self.downsample_factors = downsample_factors
def test_input_shape_image_fixed_valid(self): test_num_samples = [1, 2, 1024] test_num_rows = [1, 2, 256] test_num_cols = [1, 2, 256] test_num_channels = [1, 2, 3] for num_samples in test_num_samples: for rows in test_num_rows: for cols in test_num_cols: for num_channels in test_num_channels: x = np.random.random_sample(size=(num_samples, rows, cols, num_channels)) n, input_dim = Validation.get_input_dimension(x) self.assertEqual(n, num_samples) self.assertEqual(input_dim, (rows, cols, num_channels))
def test_input_shape_volume_fixed_valid(self): test_num_samples = [1, 2, 128] test_num_voxels = [1, 2, 64] test_num_channels = [1, 2, 3] for num_samples in test_num_samples: for rows in test_num_voxels: for cols in test_num_voxels: for depth in test_num_voxels: for num_channels in test_num_channels: x = np.random.random_sample(size=(num_samples, rows, cols, depth, num_channels)) n, input_dim = Validation.get_input_dimension(x) self.assertEqual(n, num_samples) self.assertEqual(input_dim, (rows, cols, depth, num_channels))
def get_predictions_after_masking(self, explained_model, X, y, downsample_factors=(1, ), batch_size=64, flatten=False): Validation.check_dataset(X, y) num_batches = int(np.ceil(len(X) / float(batch_size))) all_outputs = [] for batch_idx in range(num_batches): x = X[batch_idx * batch_size:(batch_idx + 1) * batch_size] y_pred = MaskingUtil.get_prediction(explained_model, x, flatten=flatten) x_imputed = MaskingUtil.get_x_imputed(x, downsample_factors, math_ops=NumpyInterface) all_y_pred_imputed = [] for x_imputed_curr in x_imputed: y_pred_imputed = MaskingUtil.get_prediction(explained_model, x_imputed_curr, flatten=flatten) all_y_pred_imputed.append(y_pred_imputed) all_y_pred_imputed = np.stack(all_y_pred_imputed).swapaxes(0, 1) all_outputs.append((x, y_pred, all_y_pred_imputed)) all_outputs = [ np.concatenate( list(map(partial(lambda x, dim: x[dim], dim=dim), all_outputs))) for dim in range(len(all_outputs[0])) ] return all_outputs
def __init__(self, callbacks=list([]), early_stopping_patience=12, batch_size=64, num_epochs=100, validation_fraction=0.1, shuffle=True, learning_rate=0.0001, optimizer=None, verbose=0): self.batch_size = batch_size Validation.check_is_positive_integer_greaterequals_1( num_epochs, var_name="num_epochs") self.num_epochs = num_epochs Validation.check_is_fraction(validation_fraction, var_name="validation_fraction") self.validation_fraction = validation_fraction self.shuffle = shuffle self.learning_rate = learning_rate self.optimizer = optimizer self.verbose = verbose self.callbacks = callbacks self.early_stopping_patience = early_stopping_patience
def test_input_shape_volume_variable_valid(self): test_num_samples = [2, 3, 128] test_num_lens = [2, 3, 64] test_num_features = [1, 2, 3] for num_samples in test_num_samples: for num_features in test_num_features: x = [np.random.random_sample(size=(test_num_lens[i % len(test_num_lens)], test_num_lens[(i + 1) % len(test_num_lens)], test_num_lens[(i + 2) % len(test_num_lens)], num_features)) for i in range(num_samples)] n, input_dim = Validation.get_input_dimension(x) self.assertEqual(n, num_samples) self.assertEqual(input_dim, (None, None, None, num_features))
def test_get_attribution_shape_multi_channel(self): num_samples, intermediary_dimensions, num_channels = [1, 2, 100], [0, 1, 2, 3], [0, 1, 2, 3] for samples in num_samples: for num_dims in intermediary_dimensions: for channels in num_channels: source_size = (samples,) + (2,)*num_dims if channels != 0: source_size += (channels,) data = np.random.normal(0, 1, size=source_size) if num_dims == 0 and channels == 0: with self.assertRaises(ValueError): Validation.get_attribution_shape(data) continue else: attribution_shape = Validation.get_attribution_shape(data) if len(source_size) >= 3: adjusted_source_size = source_size[:-1] + (1,) self.assertEqual(attribution_shape, adjusted_source_size) else: self.assertEqual(attribution_shape, source_size)
def score(self, X, y, sample_weight=None, masked_data=None): """ Evaluates the performance, in terms of causal loss, of the current CXPlain model :param X: The data samples to be evaluated. The first dimension must be the number of samples. (Required) :param y: The ground truth labels to be compared to. The first dimension must be the number of samples. (Required) :param sample_weight: The sample weight to apply to the samples in X during evaluation. The first dimension must be the number of samples and it must match that of __X__ and __y__. If None, equal weihting is used (Optional, default: None). :param masked_data: An array of precomputed masked data as can be obtained from __get_masked_data__. If None, the masked data is computed. If set, the precomputed masked data is used for scoring and computation of the masked data is skipped (Optional, default: None). :return: Score results as returned by self.model_builder.evaluate(model, X, y, sample_weight) either (i) as a single score result if __num_models__ = 1 or as a list of score results if __num_models__ is greater than 1. :exception AssertionError Thrown if the explanation model has not been fitted using __fit__ yet. """ if self.model is None: raise AssertionError("Model must be initialised when calling __predict__. " "Did you forget to __fit__ the explanation model?") output_dim = Validation.get_output_dimension(y) if masked_data is None: masked_data = self.masking_operation.get_predictions_after_masking(self.explained_model, X, y, batch_size= self.model_builder.batch_size, downsample_factors= self.downsample_factors, flatten= self.flatten_for_explained_model) masked_data = TensorflowCXPlain._clean_output_dims(output_dim, masked_data) self.last_masked_data = masked_data if self.num_models == 1: return_value = self._score_single(self.model, masked_data, y, sample_weight) else: return_value = [self._score_single(model, masked_data, y, sample_weight) for model in self.model] return return_value
def check_plot_input(x, attribution, confidence=None): # Add sample dim - inputs to __check_plot_input__ are passed without sample dim, # but __get_attribution_shape__ expects a sample dim. x_with_sample_dim = np.expand_dims(x, axis=0) attribution_with_sample_dim = np.expand_dims(attribution, axis=0) expected_attribution_shape = Validation.get_attribution_shape( x_with_sample_dim) if not np.array_equal(attribution_with_sample_dim.shape, expected_attribution_shape): raise ValueError("__attribution__ was not of the expected shape. " "__attribution__.shape = {}, " "expected shape = {}.".format( attribution.shape, expected_attribution_shape)) if confidence is not None: numel_a, numel_c = np.prod(attribution.shape), np.prod( confidence.shape) if 2 * numel_a != numel_c: raise ValueError( "__confidence__ must have exactly two times as many features as __attribution__. " "Found number of elements (__attribution__) = {}," "Found number of elements (__confidence__) = {}".format( numel_a, numel_c))
def test_is_variable_length_ndarray_true(self): (x, _), _ = TestUtil.get_random_variable_length_dataset(max_value=1024) x = np.array(x) return_value = Validation.is_variable_length(x) self.assertEqual(return_value, True)
def test_is_variable_length_padded_false(self): (x, _), _ = TestUtil.get_random_variable_length_dataset(max_value=1024) x = pad_sequences(x, padding="post", truncating="post", dtype=int) return_value = Validation.is_variable_length(x) self.assertEqual(return_value, False)
def test_check_is_positive_integer_greaterequals_1(self): with self.assertRaises(ValueError): Validation.check_is_positive_integer_greaterequals_1(-1) with self.assertRaises(ValueError): Validation.check_is_positive_integer_greaterequals_1(1.1) with self.assertRaises(ValueError): Validation.check_is_positive_integer_greaterequals_1(-1.1) with self.assertRaises(ValueError): Validation.check_is_positive_integer_greaterequals_1(0) Validation.check_is_positive_integer_greaterequals_1(1) Validation.check_is_positive_integer_greaterequals_1(2)
def build_explanation_model(self, input_dim, output_dim, loss, downsample_factors=(1, )): num_indices, num_channels, steps, downsampling_factor =\ MaskingUtil.get_input_constants(input_dim, downsample_factors) if downsampling_factor != 1 and num_indices is None: raise ValueError( "Attribution downsampling is not supported for variable length inputs. " "Please pad your data samples to the same size to use downsampling." ) input_shape = (input_dim, ) if not isinstance( input_dim, collections.Sequence) else input_dim input_layer = Input(shape=input_shape) last_layer = self.build(input_layer) if num_indices is None: last_layer = Dense(1, activation="linear")(last_layer) last_layer = Flatten()(last_layer) # None * None outputs last_layer = Lambda( K.softmax, output_shape=K.int_shape(last_layer))(last_layer) else: last_layer = Flatten()(last_layer) last_layer = Dense(num_indices, activation="softmax")(last_layer) # Prepare extra inputs for causal loss. all_auxiliary_outputs = Input(shape=(output_dim, ), name="all") all_but_one_auxiliary_outputs_input = Input(shape=(num_indices, output_dim), name="all_but_one") if num_indices is not None: all_but_one_auxiliary_outputs = Lambda(lambda x: tf.unstack( x, axis=1))(all_but_one_auxiliary_outputs_input) if K.int_shape(all_but_one_auxiliary_outputs_input)[1] == 1: all_but_one_auxiliary_outputs = [all_but_one_auxiliary_outputs] else: all_but_one_auxiliary_outputs = all_but_one_auxiliary_outputs_input all_but_one_auxiliary_outputs = Concatenate()( all_but_one_auxiliary_outputs) causal_loss_fun = CausalLoss(num_indices=num_indices, loss_function=loss) if downsampling_factor != 1: last_layer = Reshape(tuple(steps) + (1, ))(last_layer) if len(steps) == 1: # Add a dummy dimension to enable usage of __resize_images__. last_layer = Reshape(tuple(steps) + (1, 1))(last_layer) last_layer = Lambda(lambda x: resize_images( x, height_factor=downsample_factors[0], width_factor=1, data_format="channels_last"))(last_layer) elif len(steps) == 2: last_layer = Lambda(lambda x: resize_images( x, height_factor=downsample_factors[0], width_factor=downsample_factors[1], data_format="channels_last"))(last_layer) elif len(steps) == 3: last_layer = Lambda(lambda x: resize_volumes( x, depth_factor=downsample_factors[0], height_factor=downsample_factors[1], width_factor=downsample_factors[2], data_format="channels_last"))(last_layer) else: raise ValueError( "Attribution maps of larger dimensionality than 3D data are not currently supported. " "Requested output dim was: {}.".format(len(steps))) attribution_shape = Validation.get_attribution_shape_from_input_shape( num_samples=1, input_dim=input_dim)[1:] collapsed_attribution_shape = (int(np.prod(attribution_shape)), ) last_layer = Reshape(collapsed_attribution_shape)(last_layer) # Re-normalise to sum = 1 after resizing (sum = __downsampling_factor__ after resizing). last_layer = Lambda(lambda x: x / float(downsampling_factor))( last_layer) final_layer = Concatenate()( [last_layer, all_but_one_auxiliary_outputs, all_auxiliary_outputs]) model = Model(inputs=[ input_layer, all_auxiliary_outputs, all_but_one_auxiliary_outputs_input ], outputs=final_layer) model = self.compile_model(model, main_losses=causal_loss_fun, learning_rate=self.learning_rate, optimizer=self.optimizer) prediction_model = Model(input_layer, last_layer) return model, prediction_model
def test_input_shape_invalid_none(self): with self.assertRaises(ValueError): Validation.get_input_dimension(None)
def test_check_downsample_factors_at_initialisation(self): with self.assertRaises(ValueError): Validation.check_downsample_factors_at_initialisation((-1,)) with self.assertRaises(ValueError): Validation.check_downsample_factors_at_initialisation(-1) with self.assertRaises(ValueError): Validation.check_downsample_factors_at_initialisation(1.1) with self.assertRaises(ValueError): Validation.check_downsample_factors_at_initialisation(-1.1) with self.assertRaises(ValueError): Validation.check_downsample_factors_at_initialisation((3.3, 2.2)) Validation.check_downsample_factors_at_initialisation((3, 2, 1))
def test_check_is_fraction(self): with self.assertRaises(ValueError): Validation.check_is_fraction(-1.0) with self.assertRaises(ValueError): Validation.check_is_fraction(1.01) with self.assertRaises(ValueError): Validation.check_is_fraction(-0.01) Validation.check_is_fraction(1.0) Validation.check_is_fraction(0.0) Validation.check_is_fraction(0.00000001) Validation.check_is_fraction(1.0 - 0.00000001)