def test_with_token_characters_indexer(self): inputs = { "premise": "I always write unit tests for my code.", "hypothesis": "One time I didn't write any unit tests for my code.", } archive = load_archive( self.FIXTURES_ROOT / "decomposable_attention" / "serialization" / "model.tar.gz" ) predictor = Predictor.from_archive(archive, "textual-entailment") predictor._dataset_reader._token_indexers["chars"] = TokenCharactersIndexer( min_padding_length=1 ) predictor._model._text_field_embedder._token_embedders["chars"] = EmptyEmbedder() hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "hypothesis", "grad_input_1") assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"] ) # hotflip replaces words without removing
def test_with_token_characters_indexer(self): inputs = {"sentence": "I always write unit tests for my code."} archive = load_archive(self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz") predictor = Predictor.from_archive(archive) predictor._dataset_reader._token_indexers[ "chars"] = TokenCharactersIndexer(min_padding_length=1) predictor._model._text_field_embedder._token_embedders[ "chars"] = EmptyEmbedder() hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1") assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"]) # hotflip replaces words without removing # This checks for a bug that arose with a change in the pytorch API. We want to be sure we # can handle the case where we have to re-encode a vocab item because we didn't save it in # our fake embedding matrix (see Hotflip docstring for more info). hotflipper = Hotflip(predictor, max_tokens=50) hotflipper.initialize() hotflipper._first_order_taylor(grad=torch.rand((10, )).numpy(), token_idx=torch.tensor(60), sign=1)
def test_using_squad_model(self): inputs = { "question": "OMG, I heard you coded a test that succeeded on its first attempt, is that true?", "passage": "Bro, never doubt a coding wizard! I am the king of software, MWAHAHAHA", } archive = load_archive(FIXTURES_ROOT / "rc" / "bidaf" / "serialization" / "model.tar.gz") predictor = Predictor.from_archive(archive, "reading-comprehension") hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "question", "grad_input_2") print(attack) assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"]) # hotflip replaces words without removing instance = predictor._json_to_instance(inputs) assert instance["question"] != attack["final"][ 0] # check that the input has changed. outputs = predictor._model.forward_on_instance(instance) original_labeled_instance = predictor.predictions_to_labeled_instances( instance, outputs)[0] original_span_start = original_labeled_instance[ "span_start"].sequence_index original_span_end = original_labeled_instance[ "span_end"].sequence_index flipped_span_start = attack["outputs"][0]["best_span"][0] flipped_span_end = attack["outputs"][0]["best_span"][1] for i, token in enumerate(instance["question"]): token = str(token) if token in DEFAULT_IGNORE_TOKENS: assert token in attack["final"][ 0] # ignore tokens should not be changed # HotFlip keeps changing tokens until either the prediction changes or all tokens have # been changed. If there are tokens in the HotFlip final result that were in the # original (i.e., not all tokens were flipped), then the prediction should be # different. else: if token == attack["final"][0][i]: assert (original_span_start != flipped_span_start or original_span_end != flipped_span_end)
def test_targeted_attack_from_json(self): inputs = {"sentence": "The doctor ran to the emergency room to see [MASK] patient."} archive = load_archive(self.FIXTURES_ROOT / 'masked_language_model' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'masked_language_model') hotflipper = Hotflip(predictor, vocab_namespace='tokens') hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, target={'words': ['hi']}) assert attack is not None assert 'final' in attack assert 'original' in attack assert 'outputs' in attack assert len(attack['final'][0]) == len(attack['original']) # hotflip replaces words without removing assert attack['final'][0] != attack['original']
def test_hotflip(self): inputs = {"sentence": "I always write unit tests for my code."} archive = load_archive(self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz") predictor = Predictor.from_archive(archive) hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1") assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"]) # hotflip replaces words without removing
def test_targeted_attack_from_json(self): inputs = {"sentence": "The doctor ran to the emergency room to see [MASK] patient."} archive = load_archive( self.FIXTURES_ROOT / "masked_language_model" / "serialization" / "model.tar.gz" ) predictor = Predictor.from_archive(archive, "masked_language_model") hotflipper = Hotflip(predictor, vocab_namespace="tokens") hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, target={"words": ["hi"]}) assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"] ) # hotflip replaces words without removing assert attack["final"][0] != attack["original"]
def test_interpret_works_with_custom_embedding_layer(self): inputs = {"sentence": "I always write unit tests for my code"} vocab = Vocabulary() vocab.add_tokens_to_namespace( [w for w in inputs["sentence"].split(" ")]) model = FakeModelForTestingInterpret( vocab, max_tokens=len(inputs["sentence"].split(" "))) predictor = FakePredictorForTestingInterpret( model, TextClassificationJsonReader()) hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1") assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"]) # hotflip replaces words without removing
def test_hotflip(self): inputs = { "premise": "I always write unit tests for my code.", "hypothesis": "One time I didn't write any unit tests for my code." } archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'textual-entailment') hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, 'hypothesis', 'grad_input_1') assert attack is not None assert 'final' in attack assert 'original' in attack assert 'outputs' in attack assert len(attack['final'][0]) == len( attack['original']) # hotflip replaces words without removing
def test_hotflip(self): inputs = { "premise": "I always write unit tests for my code.", "hypothesis": "One time I didn't write any unit tests for my code.", } archive = load_archive( self.FIXTURES_ROOT / "decomposable_attention" / "serialization" / "model.tar.gz" ) predictor = Predictor.from_archive(archive, "textual-entailment") hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "hypothesis", "grad_input_1") assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"] ) # hotflip replaces words without removing
def test_with_token_characters_indexer(self): inputs = {"sentence": "I always write unit tests for my code."} archive = load_archive( self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz" ) predictor = Predictor.from_archive(archive) predictor._dataset_reader._token_indexers["chars"] = TokenCharactersIndexer( min_padding_length=1 ) predictor._model._text_field_embedder._token_embedders["chars"] = EmptyEmbedder() hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1") assert attack is not None assert "final" in attack assert "original" in attack assert "outputs" in attack assert len(attack["final"][0]) == len( attack["original"] ) # hotflip replaces words without removing
def test_hotflip(self): inputs = { "premise": "I always write unit tests for my code.", "hypothesis": "One time I didn't write any unit tests for my code." } archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'textual-entailment') hotflipper = Hotflip(predictor) hotflipper.initialize() attack = hotflipper.attack_from_json(inputs, 'hypothesis', 'grad_input_1') assert attack is not None assert 'final' in attack assert 'original' in attack assert 'outputs' in attack assert len(attack['final'][0]) == len( attack['original']) # hotflip replaces words without removing # test using SQuAD model (tests different equals method) inputs = { "question": "OMG, I heard you coded a test that succeeded on its first attempt, is that true?", "passage": "Bro, never doubt a coding wizard! I am the king of software, MWAHAHAHA" } archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'machine-comprehension') hotflipper = Hotflip(predictor) hotflipper.initialize() ignore_tokens = ["@@NULL@@", '.', ',', ';', '!', '?'] attack = hotflipper.attack_from_json(inputs, 'question', 'grad_input_2') assert attack is not None assert 'final' in attack assert 'original' in attack assert 'outputs' in attack assert len(attack['final'][0]) == len( attack['original']) # hotflip replaces words without removing instance = predictor._json_to_instance(inputs) assert instance['question'] != attack['final'][ 0] # check that the input has changed. outputs = predictor._model.forward_on_instance(instance) original_labeled_instance = predictor.predictions_to_labeled_instances( instance, outputs)[0] original_span_start = original_labeled_instance[ 'span_start'].sequence_index original_span_end = original_labeled_instance[ 'span_end'].sequence_index flipped_span_start = attack['outputs']['best_span'][0] flipped_span_end = attack['outputs']['best_span'][1] for token in instance['question']: token = str(token) if token in ignore_tokens: assert token in attack['final'][ 0] # ignore tokens should not be changed # HotFlip keeps changing tokens until either the predictions changes or all tokens have # been changed. If there are tokens in the HotFlip final result that were in the original # (i.e., not all tokens were flipped), then the prediction should be different. else: if token in attack['final'][0]: assert original_span_start != flipped_span_start or original_span_end != flipped_span_end