Exemple #1
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     model_name = "vilbert"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_class = registry.get_model_class(model_name)
     self.vision_feature_size = 1024
     self.vision_target_size = 1279
     config.model_config[model_name]["training_head_type"] = "pretraining"
     config.model_config[model_name][
         "visual_embedding_dim"] = self.vision_feature_size
     config.model_config[model_name][
         "v_feature_size"] = self.vision_feature_size
     config.model_config[model_name][
         "v_target_size"] = self.vision_target_size
     config.model_config[model_name]["dynamic_attention"] = False
     self.pretrain_model = model_class(config.model_config[model_name])
     self.pretrain_model.build()
     config.model_config[model_name][
         "training_head_type"] = "classification"
     config.model_config[model_name]["num_labels"] = 2
     self.finetune_model = model_class(config.model_config[model_name])
     self.finetune_model.build()
Exemple #2
0
    def setUp(self):
        test_utils.setup_proxy()
        setup_imports()
        model_name = "vinvl"
        args = test_utils.dummy_args(model=model_name, dataset="test")
        configuration = Configuration(args)
        config = configuration.get_config()
        model_config = config.model_config[model_name]
        model_config.model = model_name
        model_config.do_pretraining = False
        classification_config_dict = {
            "do_pretraining": False,
            "heads": {"mlp": {"num_labels": 3129}},
            "ce_loss": {"ignore_index": -1},
        }
        self.classification_config = OmegaConf.create(
            {**model_config, **classification_config_dict}
        )

        pretraining_config_dict = {
            "do_pretraining": True,
            "heads": {"mlm": {"hidden_size": 768}},
        }
        self.pretraining_config = OmegaConf.create(
            {**model_config, **pretraining_config_dict}
        )

        self.sample_list = self._get_sample_list()
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     self.model_name = "mmf_transformer"
     args = test_utils.dummy_args(model=self.model_name)
     configuration = Configuration(args)
     self.config = configuration.get_config()
     self.config.model_config[self.model_name].model = self.model_name
Exemple #4
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     self.model_name = "multimodelity_transformer"
     args = test_utils.dummy_args(model=self.model_name)
     configuration = Configuration(args)
     self.config = configuration.get_config()
     self.config.model_config[self.model_name].model = self.model_name
     self.finetune_model = build_model(
         self.config.model_config[self.model_name])
Exemple #5
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     model_name = "vilt"
     args = test_utils.dummy_args(model=model_name, dataset="test")
     configuration = Configuration(args)
     config = configuration.get_config()
     model_config = config.model_config[model_name]
     model_config.model = model_name
     self.pretrain_model = build_model(model_config)
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     replace_with_jit()
     model_name = "visual_bert"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_config = config.model_config[model_name]
     model_config.model = model_name
     self.pretrain_model = build_model(model_config)
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     self.model_name = "mmf_transformer"
     args = test_utils.dummy_args(model=self.model_name)
     configuration = Configuration(args)
     self.config = configuration.get_config()
     self.model_class = registry.get_model_class(self.model_name)
     self.finetune_model = self.model_class(
         self.config.model_config[self.model_name])
     self.finetune_model.build()
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     model_name = "mmbt"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_config = config.model_config[model_name]
     model_config["training_head_type"] = "classification"
     model_config["num_labels"] = 2
     model_config.model = model_name
     self.finetune_model = build_model(model_config)
Exemple #9
0
    def setUp(self):
        import transformers.models.vit.modeling_vit as vit

        setup_proxy()
        config = {
            "layer_norm_eps": 0.0001,
            "hidden_size": 768,
            "num_hidden_layers": 2,
            "do_patch_embeddings": False,
            "add_pooling_layer": False,
            "return_dict": True,
        }
        hf_config = vit.ViTConfig(**config)
        self.model = ViTModel(hf_config)
Exemple #10
0
    def setUp(self):
        test_utils.setup_proxy()
        setup_imports()
        model_name = "uniter"
        args = test_utils.dummy_args(model=model_name, dataset="vqa2")
        configuration = Configuration(args)
        config = configuration.get_config()
        model_config = config.model_config[model_name]
        model_config.model = model_name
        model_config.losses = {"vqa2": "logit_bce"}
        model_config.do_pretraining = False
        model_config.tasks = "vqa2"
        classification_config_dict = {
            "do_pretraining": False,
            "tasks": "vqa2",
            "heads": {
                "vqa2": {
                    "type": "mlp",
                    "num_labels": 3129
                }
            },
            "losses": {
                "vqa2": "logit_bce"
            },
        }
        classification_config = OmegaConf.create({
            **model_config,
            **classification_config_dict
        })

        pretraining_config_dict = {
            "do_pretraining": True,
            "tasks": "wra",
            "heads": {
                "wra": {
                    "type": "wra"
                }
            },
        }
        pretraining_config = OmegaConf.create({
            **model_config,
            **pretraining_config_dict
        })

        self.model_for_classification = build_model(classification_config)
        self.model_for_pretraining = build_model(pretraining_config)
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     self._image_modality_config = MMFTransformerModalityConfig(
         type="image",
         key="image",
         embedding_dim=256,
         position_dim=1,
         segment_id=0,
         encoder=ImageEncoderFactory.Config(
             type=ImageEncoderTypes.identity),
     )
     self._text_modality_config = MMFTransformerModalityConfig(
         type="text",
         key="text",
         embedding_dim=756,
         position_dim=128,
         segment_id=1,
         encoder=TextEncoderFactory.Config(type=TextEncoderTypes.identity),
     )
Exemple #12
0
    def setUp(self):
        test_utils.setup_proxy()
        setup_imports()
        model_name = "vilbert"
        args = test_utils.dummy_args(model=model_name)
        configuration = Configuration(args)
        config = configuration.get_config()
        self.vision_feature_size = 1024
        self.vision_target_size = 1279
        model_config = config.model_config[model_name]
        model_config["training_head_type"] = "pretraining"
        model_config["visual_embedding_dim"] = self.vision_feature_size
        model_config["v_feature_size"] = self.vision_feature_size
        model_config["v_target_size"] = self.vision_target_size
        model_config["dynamic_attention"] = False
        model_config.model = model_name

        model_config["training_head_type"] = "classification"
        model_config["num_labels"] = 2
        self.model_config = model_config
    def test_bert_tokenizer(self):
        from mmf.datasets.processors.bert_processors import BertTokenizer

        test_utils.setup_proxy()
        processor = BertTokenizer(self.config)

        # Test normal caption
        arg = {"text": "This will be a test of tokens?"}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:11] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:11] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(
            torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))

        # Test empty caption
        arg = {"text": ""}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:2] = torch.tensor([101, 102], dtype=torch.long)
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:2] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(
            torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))

        # Test long caption
        arg = {
            "text": "I am working for facebook " * 100
        }  # make a long sentence
        results = processor(arg)
        expected_input_ids = [1045, 2572, 2551, 2005, 9130] * 100
        expected_input_ids.insert(0, 101)  # [CLS]
        expected_input_ids = expected_input_ids[:128]
        expected_input_ids[-1] = 102  # [SEP]
        expected_input_ids = torch.tensor(expected_input_ids, dtype=torch.long)
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.ones(128, dtype=torch.long)
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(
            torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))

        # Test two captions
        arg = {
            "text_a": "This will be a test of tokens?",
            "text_b": "I am working for facebook",
        }
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:17] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102] +
            [1045, 2572, 2551, 2005, 9130, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_segment_ids[11:17] = 1
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:17] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(
            torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))

        # Test masked caption
        processor._probability = 1.0
        arg = {"text": "This will be a test of tokens?"}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:11] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        self.assertFalse(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(
            torch.equal(results["segment_ids"], expected_segment_ids))

        # Test [MASK] token is present
        self.assertTrue(103 in results["input_ids"])
 def test_mmbt_pretrained(self):
     test_utils.setup_proxy()
     mmbt = MMBT.from_params()
     self.assertIsNotNone(mmbt)
Exemple #15
0
 def setUp(self):
     setup_proxy()
    def test_uniter_tokenizer(self):
        from mmf.datasets.processors.bert_processors import UNITERTextTokenizer

        test_utils.setup_proxy()
        config = OmegaConf.create(
            {
                "tokenizer_config": {
                    "type": "bert-base-uncased",
                    "params": {"do_lower_case": True},
                },
                "mask_probability": 0.5,
                "max_seq_length": 128,
            }
        )

        processor = UNITERTextTokenizer(config)

        # Test normal caption
        arg = {"text": "This will be a test of tokens?"}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:11] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:11] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)

        # Test empty caption
        arg = {"text": ""}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:2] = torch.tensor([101, 102], dtype=torch.long)
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:2] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)

        # Test long caption
        arg = {"text": "I am working for facebook " * 100}  # make a long sentence
        results = processor(arg)
        expected_input_ids = [1045, 2572, 2551, 2005, 9130] * 100
        expected_input_ids.insert(0, 101)  # [CLS]
        expected_input_ids = expected_input_ids[:128]
        expected_input_ids[-1] = 102  # [SEP]
        expected_input_ids = torch.tensor(expected_input_ids, dtype=torch.long)
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.ones(128, dtype=torch.long)
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)

        # Test two captions
        arg = {
            "text_a": "This will be a test of tokens?",
            "text_b": "I am working for facebook",
        }
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:17] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102]
            + [1045, 2572, 2551, 2005, 9130, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_segment_ids[11:17] = 1
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:17] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)

        # Test masked caption
        processor._probability = 1.0
        arg = {"text": "This will be a test of tokens?"}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:11] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)

        # Test [MASK] token is present
        self.assertTrue(103 in results["input_ids_masked"])
    def test_vinvl_tokenizer(self):
        from mmf.datasets.processors.bert_processors import VinVLTextTokenizer

        test_utils.setup_proxy()
        config = OmegaConf.create(
            {
                "tokenizer_config": {
                    "type": "bert-base-uncased",
                    "params": {"do_lower_case": True},
                },
                "mask_probability": 0.5,
                "max_seq_length": 128,
                "corrupt_probability": 0,
            }
        )

        processor = VinVLTextTokenizer(config)

        # Test normal caption
        arg = {"text": "This will be a test of tokens?"}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:11] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:11] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)
        self.assertTrue("input_ids_corrupt" not in results)

        # Test empty caption
        arg = {"text": ""}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:2] = torch.tensor([101, 102], dtype=torch.long)
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:2] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)
        self.assertTrue("input_ids_corrupt" not in results)

        # Test long caption
        arg = {"text": "I am working for facebook " * 100}  # make a long sentence
        results = processor(arg)
        expected_input_ids = [1045, 2572, 2551, 2005, 9130] * 100
        expected_input_ids.insert(0, 101)  # [CLS]
        expected_input_ids = expected_input_ids[:128]
        expected_input_ids[-1] = 102  # [SEP]
        expected_input_ids = torch.tensor(expected_input_ids, dtype=torch.long)
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_masks = torch.ones(128, dtype=torch.long)
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)
        self.assertTrue("input_ids_corrupt" not in results)

        # Test two captions
        arg = {
            "text_a": "This will be a test of tokens?",
            "text_b": "I am working for facebook",
        }
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:17] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102]
            + [1045, 2572, 2551, 2005, 9130, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        expected_segment_ids[11:17] = 1
        expected_masks = torch.zeros(128, dtype=torch.long)
        expected_masks[:17] = 1
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue(torch.equal(results["input_mask"], expected_masks))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)
        self.assertTrue("input_ids_corrupt" not in results)

        # Test masked caption
        processor._probability = 1.0
        arg = {"text": "This will be a test of tokens?"}
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:11] = torch.tensor(
            [101, 2023, 2097, 2022, 1037, 3231, 1997, 19204, 2015, 1029, 102],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue(torch.equal(results["segment_ids"], expected_segment_ids))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)

        # Test [MASK] token is present
        self.assertTrue(103 in results["input_ids_masked"])
        self.assertTrue("input_ids_corrupt" not in results)

        # Test corrupt tokens
        processor._probability = 0.5
        processor._corrupt_prob = 1.0
        arg = {
            "text": "This will be a test of tokens?",
            "text_b": "test tokens",
            "random_captions": ["Something unexpected"],
            "random_labels": ["cat dog icecream"],
        }
        results = processor(arg)
        expected_input_ids = torch.zeros(128, dtype=torch.long)
        expected_input_ids[:15] = torch.tensor(
            [
                101,
                2023,
                2097,
                2022,
                1037,
                3231,
                1997,
                19204,
                2015,
                1029,
                102,
                3231,
                19204,
                2015,
                102,
            ],
            dtype=torch.long,
        )
        expected_segment_ids = torch.zeros(128, dtype=torch.long)
        self.assertTrue(torch.equal(results["input_ids"], expected_input_ids))
        self.assertTrue("input_ids_masked" in results)
        self.assertEqual(results["input_ids"].shape, results["input_ids_masked"].shape)
        self.assertTrue("input_ids_corrupt" in results)
        expected_swapped_caption = torch.zeros(128, dtype=torch.long)
        expected_swapped_caption[:8] = torch.tensor(
            [101, 2242, 9223, 102, 3231, 19204, 2015, 102],
            dtype=torch.long,
        )
        expected_swapped_labels = torch.zeros(128, dtype=torch.long)
        expected_swapped_labels[:17] = torch.tensor(
            [
                101,
                2023,
                2097,
                2022,
                1037,
                3231,
                1997,
                19204,
                2015,
                1029,
                102,
                4937,
                3899,
                3256,
                16748,
                3286,
                102,
            ],
            dtype=torch.long,
        )
        self.assertTrue(
            torch.equal(results["input_ids_corrupt"], expected_swapped_caption)
            or torch.equal(results["input_ids_corrupt"], expected_swapped_labels)
        )