def test_causal_loss_broken_loss_function(self): explained_model = TestUtil.get_classification_models()[0] batch_size = 32 num_samples = 1024 x, y = TestUtil.get_test_dataset_with_one_oracle_feature( num_samples=num_samples) TestUtil.fit_proxy(explained_model, x, y) masking = ZeroMasking() _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( explained_model, x, y, batch_size=batch_size, downsample_factors=(1, ), flatten=True) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) def broken_loss(y_true, y_pred): return np.mean(NumpyInterface.binary_crossentropy(y_true, y_pred), axis=0) with self.assertRaises(ValueError): _ = calculate_delta_errors(y, auxiliary_outputs, all_but_one_auxiliary_outputs, broken_loss, math_ops=NumpyInterface)
def test_causal_loss_padded_input(self): models = TestUtil.get_classification_models() batch_size = 32 num_samples = 1024 num_words = 1024 (x_train, y_train), (x_test, y_test) = \ TestUtil.get_random_variable_length_dataset(num_samples=num_samples, max_value=num_words) x, y = np.concatenate([x_train, x_test], axis=0), np.concatenate([y_train, y_test], axis=0) self.assertEqual(x.shape[0], num_samples) for explained_model in models: counter = CountVectoriser(num_words) tfidf_transformer = TfidfTransformer() explained_model = Pipeline([('counts', counter), ('tfidf', tfidf_transformer), ('model', explained_model)]) TestUtil.fit_proxy(explained_model, x, y) masking = WordDropMasking() x = pad_sequences(x, padding="post", truncating="post", dtype=int) _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( explained_model, x, y, batch_size=batch_size, downsample_factors=(1, ), flatten=False) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) delta_errors = calculate_delta_errors( y, auxiliary_outputs, all_but_one_auxiliary_outputs, NumpyInterface.binary_crossentropy, math_ops=NumpyInterface) # Ensure correct delta error dimensionality. self.assertEqual(delta_errors.shape, (num_samples, x.shape[1]))
def test_causal_loss_duplicate_feature(self): models = TestUtil.get_classification_models() batch_size = 32 num_samples = 1024 x, y = TestUtil.get_test_dataset_with_two_oracle_features( num_samples=num_samples) for explained_model in models: TestUtil.fit_proxy(explained_model, x, y) masking = ZeroMasking() _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( explained_model, x, y, batch_size=batch_size, downsample_factors=(1, ), flatten=True) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) delta_errors = calculate_delta_errors( y, auxiliary_outputs, all_but_one_auxiliary_outputs, NumpyInterface.binary_crossentropy, math_ops=NumpyInterface) # Ensure correct delta error dimensionality. self.assertEqual(delta_errors.shape, (num_samples, x.shape[-1])) # Ensure both input oracles receive the same importance. self.assertTrue( np.allclose(delta_errors[:, 0], delta_errors[:, 1], atol=0.1, rtol=0.1))
def test_causal_loss_confounded_input(self): models = TestUtil.get_classification_models() batch_size = 32 num_samples = 1024 x, y = TestUtil.get_test_dataset_with_confounded_input( num_samples=num_samples) for explained_model in models: TestUtil.fit_proxy(explained_model, x, y) masking = ZeroMasking() _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( explained_model, x, y, batch_size=batch_size, downsample_factors=(1, ), flatten=True) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) delta_errors = calculate_delta_errors( y, auxiliary_outputs, all_but_one_auxiliary_outputs, NumpyInterface.binary_crossentropy, math_ops=NumpyInterface) # Ensure correct delta error dimensionality. self.assertEqual(delta_errors.shape, (num_samples, x.shape[-1])) # Ensure both input oracles receive (roughly) the same importance upon convergence. self.assertTrue( np.abs( np.diff(np.sum(delta_errors, axis=0) / float(num_samples))) < 0.1)
def test_causal_loss_simple(self): models = TestUtil.get_classification_models() batch_size = 32 num_samples = 1024 x, y = TestUtil.get_test_dataset_with_one_oracle_feature( num_samples=num_samples) for explained_model in models: TestUtil.fit_proxy(explained_model, x, y) masking = ZeroMasking() _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( explained_model, x, y, batch_size=batch_size, downsample_factors=(1, ), flatten=True) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) delta_errors = calculate_delta_errors( y, auxiliary_outputs, all_but_one_auxiliary_outputs, NumpyInterface.binary_crossentropy, math_ops=NumpyInterface) # Ensure correct delta error dimensionality. self.assertEqual(delta_errors.shape, (num_samples, x.shape[-1])) # Feature at index 0 should be the most important for __explained_model__'s predictions # - if the model converged correctly. self.assertEqual(np.argmax(np.sum(delta_errors, axis=0)), 0)
def get_feature_importances(self, model): from cxplain import ZeroMasking from cxplain.util.test_util import TestUtil from cxplain.backend.masking.masking_util import MaskingUtil from cxplain.backend.causal_loss import calculate_delta_errors from cxplain.backend.numpy_math_interface import NumpyInterface x, y = self.test_set[0], self.test_set[1] masking = ZeroMasking() if isinstance(model, Pipeline): transform = model.steps[0][1] x = transform.transform(np.array(x)) model = model.steps[1][1] num_features = np.array(x).shape[-1] max_num_feature_groups = int( np.rint(self.args["max_num_feature_groups"])) if max_num_feature_groups >= num_features: _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( model, x, y, batch_size=len(x), downsample_factors=(1, ), flatten=True) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) delta_errors = calculate_delta_errors( np.expand_dims(y, axis=-1), auxiliary_outputs, all_but_one_auxiliary_outputs, NumpyInterface.binary_crossentropy, math_ops=NumpyInterface) group_importances = np.mean(delta_errors, axis=0) feature_groups = np.expand_dims(np.arange(x[0].shape[-1]), axis=-1).tolist() else: class ModelWrapper(object): def __init__(self, wrapped_model, real_data, dummy_to_real_mapping): self.wrapped_model = wrapped_model self.real_data = real_data self.dummy_to_real_mapping = dummy_to_real_mapping def map_from_dummy(self, dummy): mask = np.ones(np.array(self.real_data).shape) for i, row in enumerate(dummy): for j, group in enumerate(self.dummy_to_real_mapping): if row[j] == 0.: mask[i, group] = 0 return self.real_data * mask def predict(self, x): x = self.map_from_dummy(x) y = MaskingUtil.predict_proxy(model, x) if len(y.shape) == 1: y = np.expand_dims(y, axis=-1) return y num_groups = 1 feature_groups = [np.random.permutation(np.arange(x[0].shape[-1]))] group_importances = [1.0] while num_groups < max_num_feature_groups: num_groups += 1 # Recurse into largest relative importance group. did_find_splittable = False highest_importances = np.argsort(group_importances)[::-1] for highest_importance in highest_importances: if len(feature_groups[highest_importance]) != 1: did_find_splittable = True break else: continue if not did_find_splittable: # max_num_groups > len(features) - abort. break rest = len(feature_groups[highest_importance]) % 2 if rest != 0: carry = feature_groups[highest_importance][-rest:].tolist() feature_groups[highest_importance] = feature_groups[ highest_importance][:-rest] else: carry = [] feature_groups[highest_importance] = np.split( feature_groups[highest_importance], 2) feature_groups[highest_importance][0] = np.array( feature_groups[highest_importance][0].tolist() + carry) recombined = feature_groups[:highest_importance] + \ feature_groups[highest_importance] + \ feature_groups[highest_importance + 1:] assert 0 not in map(len, recombined) feature_groups = recombined wrapped_model = ModelWrapper(model, x, feature_groups) dummy_data = np.ones((len(x), num_groups)) _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking( wrapped_model, dummy_data, y, batch_size=len(x), downsample_factors=(1, ), flatten=True) auxiliary_outputs = y_pred all_but_one_auxiliary_outputs = all_y_pred_imputed all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim( all_but_one_auxiliary_outputs) delta_errors = calculate_delta_errors( np.expand_dims(y, axis=-1), auxiliary_outputs, all_but_one_auxiliary_outputs, NumpyInterface.binary_crossentropy, math_ops=NumpyInterface) group_importances = np.mean(delta_errors, axis=0) return feature_groups, group_importances