def test_multiple_tensors_compare_with_exp_wo_mult_by_inputs(self) -> None: net = BasicModel_MultiLayer(multi_input_module=True) inp = torch.tensor([[0.0, 100.0, 0.0]]) base = torch.tensor([[0.0, 0.0, 0.0]]) target_layer = net.multi_relu layer_ig = LayerIntegratedGradients(net, target_layer) layer_ig_wo_mult_by_inputs = LayerIntegratedGradients( net, target_layer, multiply_by_inputs=False ) layer_act = LayerActivation(net, target_layer) attributions = layer_ig.attribute(inp, target=0) attributions_wo_mult_by_inputs = layer_ig_wo_mult_by_inputs.attribute( inp, target=0 ) inp_minus_baseline_activ = tuple( inp_act - base_act for inp_act, base_act in zip( layer_act.attribute(inp), layer_act.attribute(base) ) ) assertTensorTuplesAlmostEqual( self, tuple( attr_wo_mult * inp_min_base for attr_wo_mult, inp_min_base in zip( attributions_wo_mult_by_inputs, inp_minus_baseline_activ ) ), attributions, )
def _assert_compare_with_emb_patching(self, input, baseline, additional_args): model = BasicEmbeddingModel(nested_second_embedding=True) lig = LayerIntegratedGradients(model, model.embedding1) attributions, delta = lig.attribute( input, baselines=baseline, additional_forward_args=additional_args, return_convergence_delta=True, ) # now let's interpret with standard integrated gradients and # the embeddings for monkey patching interpretable_embedding = configure_interpretable_embedding_layer( model, "embedding1" ) input_emb = interpretable_embedding.indices_to_embeddings(input) baseline_emb = interpretable_embedding.indices_to_embeddings(baseline) ig = IntegratedGradients(model) attributions_with_ig, delta_with_ig = ig.attribute( input_emb, baselines=baseline_emb, additional_forward_args=additional_args, target=0, return_convergence_delta=True, ) remove_interpretable_embedding_layer(model, interpretable_embedding) assertArraysAlmostEqual(attributions, attributions_with_ig) assertArraysAlmostEqual(delta, delta_with_ig)
def _assert_compare_with_layer_conductance( self, model: Module, input: Tensor, attribute_to_layer_input: bool = False): lc = LayerConductance(model, cast(Module, model.linear2)) # For large number of steps layer conductance and layer integrated gradients # become very close attribution, delta = lc.attribute( input, target=0, n_steps=1500, return_convergence_delta=True, attribute_to_layer_input=attribute_to_layer_input, ) lig = LayerIntegratedGradients(model, cast(Module, model.linear2)) attributions2, delta2 = lig.attribute( input, target=0, n_steps=1500, return_convergence_delta=True, attribute_to_layer_input=attribute_to_layer_input, ) assertArraysAlmostEqual(attribution, attributions2, 0.01) assertArraysAlmostEqual(delta, delta2, 0.05)
def test_multiple_layers_multiple_inputs_shared_input(self) -> None: input1 = torch.randn(5, 3) input2 = torch.randn(5, 3) input3 = torch.randn(5, 3) inputs = (input1, input2, input3) baseline = tuple(torch.zeros_like(inp) for inp in inputs) net = BasicModel_MultiLayer_TrueMultiInput() lig = LayerIntegratedGradients(net, layer=[net.m1, net.m234]) ig = IntegratedGradients(net) # test layer inputs attribs_inputs = lig.attribute(inputs, baseline, target=0, attribute_to_layer_input=True) attribs_inputs_regular_ig = ig.attribute(inputs, baseline, target=0) self.assertIsInstance(attribs_inputs, list) self.assertEqual(len(attribs_inputs), 2) self.assertIsInstance(attribs_inputs[0], Tensor) self.assertIsInstance(attribs_inputs[1], tuple) self.assertEqual(len(attribs_inputs[1]), 3) assertTensorTuplesAlmostEqual( self, # last input for second layer is first input => # add the attributions ( attribs_inputs[0] + attribs_inputs[1][-1], ) + attribs_inputs[1][0:-1], attribs_inputs_regular_ig, delta=1e-5, ) # test layer outputs attribs = lig.attribute(inputs, baseline, target=0) ig = IntegratedGradients(lambda x, y: x + y) attribs_ig = ig.attribute( (net.m1(input1), net.m234(input2, input3, input1, 1)), (net.m1(baseline[0]), net.m234(baseline[1], baseline[2], baseline[1], 1)), target=0, ) assertTensorTuplesAlmostEqual(self, attribs, attribs_ig, delta=1e-5)
def test_multiple_layers_multiple_input_outputs(self) -> None: # test with multiple layers, where one layer accepts multiple inputs input1 = torch.randn(5, 3) input2 = torch.randn(5, 3) input3 = torch.randn(5, 3) input4 = torch.randn(5, 3) inputs = (input1, input2, input3, input4) baseline = tuple(torch.zeros_like(inp) for inp in inputs) net = BasicModel_MultiLayer_TrueMultiInput() lig = LayerIntegratedGradients(net, layer=[net.m1, net.m234]) ig = IntegratedGradients(net) # test layer inputs attribs_inputs = lig.attribute(inputs, baseline, target=0, attribute_to_layer_input=True) attribs_inputs_regular_ig = ig.attribute(inputs, baseline, target=0) self.assertIsInstance(attribs_inputs, list) self.assertEqual(len(attribs_inputs), 2) self.assertIsInstance(attribs_inputs[0], Tensor) self.assertIsInstance(attribs_inputs[1], tuple) self.assertEqual(len(attribs_inputs[1]), 3) assertTensorTuplesAlmostEqual( self, (attribs_inputs[0], ) + attribs_inputs[1], attribs_inputs_regular_ig, delta=1e-7, ) # test layer outputs attribs = lig.attribute(inputs, baseline, target=0) ig = IntegratedGradients(lambda x, y: x + y) attribs_ig = ig.attribute( (net.m1(input1), net.m234(input2, input3, input4, 1)), (net.m1(baseline[0]), net.m234(baseline[1], baseline[2], baseline[3], 1)), target=0, ) assertTensorTuplesAlmostEqual(self, attribs, attribs_ig, delta=1e-7)
def _assert_compare_with_expected( self, model: Module, target_layer: Module, test_input: Union[Tensor, Tuple[Tensor, ...]], expected_ig: Tuple[List[List[float]], ...], additional_input: Any = None, ): layer_ig = LayerIntegratedGradients(model, target_layer) attributions = layer_ig.attribute( test_input, target=0, additional_forward_args=additional_input ) assertTensorTuplesAlmostEqual(self, attributions, expected_ig, delta=0.01)
def _assert_compare_with_expected( self, model, target_layer, test_input, expected_ig, additional_input=None, ): layer_ig = LayerIntegratedGradients(model, target_layer) attributions = layer_ig.attribute( test_input, target=0, additional_forward_args=additional_input) assertTensorTuplesAlmostEqual(self, attributions, expected_ig, delta=0.01)
def _assert_compare_with_emb_patching( self, input: Union[Tensor, Tuple[Tensor, ...]], baseline: Union[Tensor, Tuple[Tensor, ...]], additional_args: Union[None, Tuple[Tensor, ...]], multiply_by_inputs: bool = True, multiple_emb: bool = False, ): model = BasicEmbeddingModel(nested_second_embedding=True) if multiple_emb: module_list: List[Module] = [model.embedding1, model.embedding2] lig = LayerIntegratedGradients( model, module_list, multiply_by_inputs=multiply_by_inputs, ) else: lig = LayerIntegratedGradients( model, model.embedding1, multiply_by_inputs=multiply_by_inputs) attributions, delta = lig.attribute( input, baselines=baseline, additional_forward_args=additional_args, return_convergence_delta=True, ) # now let's interpret with standard integrated gradients and # the embeddings for monkey patching e1 = configure_interpretable_embedding_layer(model, "embedding1") e1_input_emb = e1.indices_to_embeddings( input[0] if multiple_emb else input) e1_baseline_emb = e1.indices_to_embeddings( baseline[0] if multiple_emb else baseline) input_emb = e1_input_emb baseline_emb = e1_baseline_emb e2 = None if multiple_emb: e2 = configure_interpretable_embedding_layer(model, "embedding2") e2_input_emb = e2.indices_to_embeddings(*input[1:]) e2_baseline_emb = e2.indices_to_embeddings(*baseline[1:]) input_emb = (e1_input_emb, e2_input_emb) baseline_emb = (e1_baseline_emb, e2_baseline_emb) ig = IntegratedGradients(model, multiply_by_inputs=multiply_by_inputs) attributions_with_ig, delta_with_ig = ig.attribute( input_emb, baselines=baseline_emb, additional_forward_args=additional_args, target=0, return_convergence_delta=True, ) remove_interpretable_embedding_layer(model, e1) if e2 is not None: remove_interpretable_embedding_layer(model, e2) self.assertEqual(isinstance(attributions_with_ig, tuple), isinstance(attributions, list)) self.assertTrue( isinstance(attributions_with_ig, tuple) if multiple_emb else not isinstance(attributions_with_ig, tuple)) # convert to tuple for comparison if not isinstance(attributions_with_ig, tuple): attributions = (attributions, ) attributions_with_ig = (attributions_with_ig, ) else: # convert list to tuple self.assertIsInstance(attributions, list) attributions = tuple(attributions) for attr_lig, attr_ig in zip(attributions, attributions_with_ig): self.assertEqual(attr_lig.shape, attr_ig.shape) assertArraysAlmostEqual(attributions, attributions_with_ig) if multiply_by_inputs: assertArraysAlmostEqual(delta, delta_with_ig)