Beispiel #1
0
    def test_forward(self):
        model_config = self.config.model_config.cnn_lstm

        cnn_lstm = CNNLSTM(model_config)
        cnn_lstm.build()
        cnn_lstm.init_losses()

        self.assertTrue(isinstance(cnn_lstm, torch.nn.Module))

        test_sample = Sample()
        test_sample.text = torch.randint(1, 79, (10, ), dtype=torch.long)
        test_sample.image = torch.randn(3, 320, 480)
        test_sample.targets = torch.randn(32)

        test_sample_list = SampleList([test_sample])
        test_sample_list.dataset_type = "train"
        test_sample_list.dataset_name = "clevr"

        test_sample_list = test_sample_list.to(get_current_device())
        cnn_lstm = cnn_lstm.to(get_current_device())

        output = cnn_lstm(test_sample_list)

        scores = output["scores"]
        loss = output["losses"]["train/clevr/logit_bce"]

        np.testing.assert_almost_equal(loss.item(), 19.2635, decimal=4)
        self.assertEqual(scores.size(), torch.Size((1, 32)))
    def test_pretrained_model(self):
        sample_list = SampleList()

        sample_list.add_field(
            "input_ids",
            torch.randint(low=0, high=BERT_VOCAB_SIZE, size=(1, 128)).long(),
        )
        sample_list.add_field("input_mask", torch.ones((1, 128)).long())
        sample_list.add_field("segment_ids", torch.zeros(1, 128).long())
        sample_list.add_field("image_feature_0", torch.rand((1, 100, 2048)).float())
        sample_list.add_field(
            "lm_label_ids", torch.zeros((1, 128), dtype=torch.long).fill_(-1)
        )

        self.pretrain_model.eval()
        self.pretrain_model = self.pretrain_model.to(get_current_device())
        sample_list = sample_list.to(get_current_device())

        sample_list.dataset_name = "random"
        sample_list.dataset_type = "test"
        with torch.no_grad():
            model_output = self.pretrain_model(sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("random/test/masked_lm_loss" in model_output["losses"])
        self.assertTrue(model_output["losses"]["random/test/masked_lm_loss"] == 0)
Beispiel #3
0
    def __call__(self, sample_list, *args, **kwargs):
        if not self._is_pl_enabled:
            # Move to proper device i.e. same as the model before passing
            sample_list = to_device(sample_list, get_current_device())

        model_output = super().__call__(sample_list, *args, **kwargs)

        # Don't do anything fancy to output if it is pretrained
        if self.is_pretrained:
            return model_output

        # Make sure that the output from the model is a Mapping
        assert isinstance(
            model_output, collections.abc.Mapping
        ), "A dict must be returned from the forward of the model."

        if "losses" in model_output:
            if not self._logged_warning["losses_present"]:
                warnings.warn(
                    "'losses' already present in model output. "
                    "No calculation will be done in base model."
                )
                self._logged_warning["losses_present"] = True

            assert isinstance(
                model_output["losses"], collections.abc.Mapping
            ), "'losses' must be a dict."
        elif hasattr(self, "losses"):
            model_output["losses"] = self.losses(sample_list, model_output)
        else:
            model_output["losses"] = {}

        return model_output
Beispiel #4
0
    def test_pretrained_model(self):
        img_dim = 1024
        model = UNITERModelBase(img_dim=img_dim)

        device = get_current_device()
        model.eval()
        model = model.to(device)

        bs = 8
        num_feats = 100
        max_sentence_len = 25
        pos_dim = 7
        input_ids = torch.ones((bs, max_sentence_len),
                               dtype=torch.long).to(device)
        img_feat = torch.rand((bs, num_feats, img_dim)).to(device)
        img_pos_feat = torch.rand((bs, num_feats, pos_dim)).to(device)
        position_ids = torch.arange(0,
                                    input_ids.size(1),
                                    dtype=torch.long,
                                    device=device).unsqueeze(0)
        attention_mask = torch.ones(
            (bs, max_sentence_len + num_feats)).to(device)

        with torch.no_grad():
            model_output = model(input_ids, position_ids, img_feat,
                                 img_pos_feat, attention_mask).final_layer

        self.assertEqual(model_output.shape, torch.Size([8, 125, 768]))
Beispiel #5
0
    def _get_sample_list(self):
        bs = 8
        num_feats = 100
        max_sentence_len = 25
        img_dim = 2048
        cls_dim = 3129
        input_ids = torch.ones((bs, max_sentence_len), dtype=torch.long)
        input_mask = torch.ones((bs, max_sentence_len), dtype=torch.long)
        image_feat = torch.rand((bs, num_feats, img_dim))
        position_ids = (torch.arange(
            0, max_sentence_len, dtype=torch.long,
            device=image_feat.device).unsqueeze(0).expand(bs, -1))
        img_pos_feat = torch.rand((bs, num_feats, 7))
        attention_mask = torch.zeros((bs, max_sentence_len + num_feats),
                                     dtype=torch.long)
        image_mask = torch.zeros((bs, num_feats), dtype=torch.long)
        targets = torch.rand((bs, cls_dim))

        sample_list = SampleList()
        sample_list.add_field("input_ids", input_ids)
        sample_list.add_field("input_mask", input_mask)
        sample_list.add_field("image_feat", image_feat)
        sample_list.add_field("img_pos_feat", img_pos_feat)
        sample_list.add_field("attention_mask", attention_mask)
        sample_list.add_field("image_mask", image_mask)
        sample_list.add_field("targets", targets)
        sample_list.add_field("dataset_name", "test")
        sample_list.add_field("dataset_type", "test")
        sample_list.add_field("position_ids", position_ids)
        sample_list.to(get_current_device())

        return sample_list
Beispiel #6
0
    def _get_sample_list(self):
        bs = 8
        num_feats = 100
        max_sentence_len = 25
        img_dim = 2048
        vqa_cls_dim = 3129
        input_ids = torch.ones((bs, max_sentence_len), dtype=torch.long)
        input_mask = torch.ones((bs, max_sentence_len), dtype=torch.long)
        img_feat = torch.rand((bs, num_feats, img_dim))

        max_features = torch.ones((bs, num_feats)) * num_feats
        bbox = torch.randint(50, 200, (bs, num_feats, 4)).float()
        image_height = torch.randint(100, 300, (bs, ))
        image_width = torch.randint(100, 300, (bs, ))
        image_info = {
            "max_features": max_features,
            "bbox": bbox,
            "image_height": image_height,
            "image_width": image_width,
        }
        targets = torch.rand((bs, vqa_cls_dim))
        is_correct = torch.ones((bs, ), dtype=torch.long)

        sample_list = SampleList()
        sample_list.add_field("input_ids", input_ids)
        sample_list.add_field("image_feature_0", img_feat)
        sample_list.add_field("input_mask", input_mask)
        sample_list.add_field("image_info_0", image_info)
        sample_list.add_field("targets", targets)
        sample_list.add_field("is_correct", is_correct)
        sample_list = sample_list.to(get_current_device())
        return sample_list
 def __init__(self, dataset_name, config, dataset_type, num_examples, *args,
              **kwargs):
     self.num_examples = num_examples
     self.features = [float(x) for x in range(self.num_examples)]
     self.annotations = [float(x) for x in range(self.num_examples)]
     self._device = get_current_device()
     self._dataset_name = dataset_name
Beispiel #8
0
    def test_vinvl_for_classification(self):
        model_for_classification = build_model(self.classification_config)
        model_for_classification.eval()
        model_for_classification = model_for_classification.to(get_current_device())
        with torch.no_grad():
            model_output = model_for_classification(self.sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("ce" in model_output["losses"])
Beispiel #9
0
 def test_current_device(self):
     config = {"training": {"seed": 1}, "distributed": {"init_method": None}}
     deviceMock = DeviceMock(OmegaConf.create(config))
     deviceMock.configure_seed()
     deviceMock.configure_device()
     device = get_current_device()
     if torch.cuda.is_available():
         self.assertEqual(device, "cuda:0")
     else:
         self.assertEqual(device, torch.device(type="cpu"))
Beispiel #10
0
 def __init__(self, dataset_name, config, dataset_type="train", *args, **kwargs):
     super().__init__()
     if config is None:
         config = {}
     self.config = config
     self._dataset_name = dataset_name
     self._dataset_type = dataset_type
     self._global_config = registry.get("config")
     self._device = get_current_device()
     self.use_cuda = "cuda" in str(self._device)
Beispiel #11
0
    def calculate(self,
                  sample_list,
                  model_output,
                  execute_on_master_only=True,
                  *args,
                  **kwargs):
        """Calculate detection mean AP (mAP) from the prediction list and the dataset
        annotations. The function returns COCO-style mAP@IoU=0.50:0.95.

        Args:
            sample_list (SampleList): SampleList provided by DataLoader for
                                current iteration.
            model_output (Dict): Dict returned by model. This should contain
                                "prediction_report" field, which is a list of
                                detection predictions from the model.
            execute_on_master_only (bool): Whether to only run mAP evaluation on the
                                master node over the gathered detection prediction
                                (to avoid wasting computation and CPU OOM).
                                Default: True (only run mAP evaluation on master).

        Returns:
            torch.FloatTensor: COCO-style mAP@IoU=0.50:0.95.

        """

        # as the detection mAP metric is run on the entire dataset-level predictions,
        # which are *already* gathered from all notes, the evaluation should only happen
        # in one node and broadcasted to other nodes (to avoid CPU OOM due to concurrent
        # mAP evaluation)
        from mmf.utils.distributed import broadcast_tensor, is_master
        from mmf.utils.general import get_current_device
        from pycocotools.coco import COCO
        from pycocotools.cocoeval import COCOeval

        device = get_current_device()
        if execute_on_master_only and not is_master():
            # dummy mAP to be override in boardcasting
            mAP = torch.tensor(-1, dtype=torch.float, device=device)
        else:
            predictions = model_output.prediction_report

            cocoGt = COCO(self.dataset_json_files[sample_list.dataset_name][
                sample_list.dataset_type])
            cocoDt = cocoGt.loadRes(predictions)
            cocoEval = COCOeval(cocoGt, cocoDt, "bbox")
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            mAP = torch.tensor(cocoEval.stats[0],
                               dtype=torch.float,
                               device=device)

        if execute_on_master_only:
            mAP = broadcast_tensor(mAP, src=0)
        return mAP
Beispiel #12
0
def compare_torchscript_transformer_models(model, vocab_size):
    test_sample = Sample()
    test_sample.input_ids = torch.randint(low=0, high=vocab_size, size=(128,)).long()
    test_sample.input_mask = torch.ones(128).long()
    test_sample.segment_ids = torch.zeros(128).long()
    test_sample.image_feature_0 = torch.rand((1, 100, 2048)).float()
    test_sample.image = torch.rand((3, 300, 300)).float()
    test_sample_list = SampleList([test_sample])

    model = model.to(get_current_device())
    test_sample_list = test_sample_list.to(get_current_device())

    with torch.no_grad():
        model_output = model(test_sample_list)

    script_model = torch.jit.script(model)
    with torch.no_grad():
        script_output = script_model(test_sample_list)

    return torch.equal(model_output["scores"], script_output["scores"])
Beispiel #13
0
    def test_vinvl_for_pretraining(self):
        model_for_pretraining = build_model(self.pretraining_config)
        model_for_pretraining.eval()
        model_for_pretraining = model_for_pretraining.to(get_current_device())

        with torch.no_grad():
            model_output = model_for_pretraining(self.sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("masked_lm_loss" in model_output["losses"])
        self.assertTrue("three_way_contrastive_loss" in model_output["losses"])
Beispiel #14
0
    def forward(self, image_path: str, text: dict, image_format: str = "path"):
        text_output = self.processor["text_processor"](text)
        if image_format == "path":
            img = np.array(Image.open(image_path))
        elif image_format == "url":
            img = np.array(
                Image.open(requests.get(image_path, stream=True).raw))
        img = torch.as_tensor(img)

        if self.model_items["config"].image_feature_encodings.type == "frcnn":
            max_detect = self.model_items[
                "config"].image_feature_encodings.params.max_detections
            image_preprocessed, sizes, scales_yx = self.processor[
                "image_processor"](img)
            image_output = self.feature_extractor(
                image_preprocessed,
                sizes=sizes,
                scales_yx=scales_yx,
                padding=None,
                max_detections=max_detect,
                return_tensors="pt",
            )
            image_output = image_output[0]
        else:
            image_preprocessed = self.processor["image_processor"](img)
            image_output = self.feature_extractor(image_preprocessed)

        sample = Sample(text_output)
        sample.image_feature_0 = image_output
        sample_list = SampleList([sample])
        sample_list = sample_list.to(get_current_device())
        self.model = self.model.to(get_current_device())
        output = self.model(sample_list)
        sample_list.id = [sample_list.input_ids[0][0]]
        report = Report(sample_list, output)
        answers = self.processor["output_processor"](report)
        answer = self.processor["answer_processor"].idx2word(
            answers[0]["answer"])

        return answer
Beispiel #15
0
 def setUp(self):
     self.k = 2
     self.batch_size = 64
     self.num_tokens = 10
     self.embedding_size = 768
     self.token_len = 10
     self.device = get_current_device()
     self.encoded_layers = [
         torch.randn(self.batch_size, self.token_len,
                     self.embedding_size).to(self.device) for _ in range(3)
     ]
     self.pad_mask = torch.randn(self.batch_size,
                                 self.token_len).to(self.device)
Beispiel #16
0
    def test_uniter_for_classification(self):
        self.model_for_classification.eval()
        self.model_for_classification = self.model_for_classification.to(
            get_current_device())
        sample_list = self._get_sample_list()

        sample_list.dataset_name = "vqa2"
        sample_list.dataset_type = "test"
        with torch.no_grad():
            model_output = self.model_for_classification(sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("test/vqa2/logit_bce" in model_output["losses"])
Beispiel #17
0
    def test_uniter_for_pretraining(self):
        self.model_for_pretraining.eval()
        self.model_for_pretraining = self.model_for_pretraining.to(
            get_current_device())
        sample_list = self._get_sample_list()
        sample_list["tasks"] = "wra"

        sample_list.dataset_name = "vqa2"
        sample_list.dataset_type = "test"
        with torch.no_grad():
            model_output = self.model_for_pretraining(sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("wra_loss" in model_output["losses"])
Beispiel #18
0
 def _test_model_performance(self, model):
     model = model.to(get_current_device())
     result = model.classify("https://i.imgur.com/tEcsk5q.jpg",
                             "look how many people love you")
     self.assertEqual(result["label"], 0)
     np.testing.assert_almost_equal(result["confidence"], 0.9993, decimal=3)
     result = model.classify("https://i.imgur.com/tEcsk5q.jpg",
                             "they have the privilege")
     self.assertEqual(result["label"], 0)
     np.testing.assert_almost_equal(result["confidence"], 0.9777, decimal=1)
     result = model.classify("https://i.imgur.com/tEcsk5q.jpg",
                             "hitler and jews")
     self.assertEqual(result["label"], 1)
     np.testing.assert_almost_equal(result["confidence"], 0.8342, decimal=3)
Beispiel #19
0
    def test_pretrained_model(self):
        sample_list = SampleList()

        sample_list.add_field(
            "input_ids",
            torch.randint(low=0, high=BERT_VOCAB_SIZE, size=(1, 128)).long(),
        )
        sample_list.add_field("input_mask", torch.ones((1, 128)).long())
        sample_list.add_field("segment_ids", torch.zeros(1, 128).long())
        sample_list.add_field("image", torch.rand((1, 3, 224, 224)).float())
        sample_list.add_field("targets", torch.rand((1, 3129)).float())

        self.pretrain_model.eval()
        self.pretrain_model = self.pretrain_model.to(get_current_device())
        sample_list = sample_list.to(get_current_device())

        sample_list.dataset_name = "test"
        sample_list.dataset_type = "test"
        with torch.no_grad():
            model_output = self.pretrain_model(sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("test/test/logit_bce" in model_output["losses"])
Beispiel #20
0
    def test_classification_forward(self):
        model = VinVLForClassification().to(get_current_device())
        model.eval()

        with torch.no_grad():
            model_output = model(
                input_ids=self.input_ids,
                img_feats=self.img_feats,
                attention_mask=self.attention_mask,
                token_type_ids=self.token_type_ids,
                labels=self.labels,
            )
        self.assertTrue("losses" in model_output)
        self.assertTrue("scores" in model_output)
        self.assertTrue("ce" in model_output["losses"])
Beispiel #21
0
    def test_uniter_for_pretraining(self):
        # UNITER pretraining has 5 pretraining tasks,
        # we have one unique head for each, and in each
        # forward pass we train on a different task.
        # In this test we try running a forward pass
        # through each head.
        heads = {
            "mlm": {
                "type": "mlm"
            },
            "itm": {
                "type": "itm"
            },
            "mrc": {
                "type": "mrc"
            },
            "mrfr": {
                "type": "mrfr"
            },
            "wra": {
                "type": "wra"
            },
        }
        tasks = "mlm,itm,mrc,mrfr,wra"
        mask_probability = 0.15
        model = UNITERForPretraining(head_configs=heads,
                                     tasks=tasks,
                                     mask_probability=mask_probability)
        model.eval()
        model = model.to(get_current_device())
        sample_list = self._get_sample_list()
        self._enhance_sample_list_for_pretraining(sample_list)

        expected_loss_names = {
            "mlm": "masked_lm_loss",
            "itm": "itm_loss",
            "mrc": "mrc_loss",
            "mrfr": "mrfr_loss",
            "wra": "wra_loss",
        }

        for task_name, loss_name in expected_loss_names.items():
            sample_list["task"] = task_name
            with torch.no_grad():
                model_output = model(sample_list)

            self.assertTrue("losses" in model_output)
            self.assertTrue(loss_name in model_output["losses"])
Beispiel #22
0
    def _enhance_sample_list_for_pretraining(self, sample_list):
        bs = sample_list["input_ids"].size(0)
        sentence_len = sample_list["input_ids"].size(1)

        is_correct = torch.ones((bs, ), dtype=torch.long)
        lm_label_ids = torch.zeros((bs, sentence_len), dtype=torch.long)
        input_ids_masked = sample_list["input_ids"]
        num_feat = sample_list["image_feat"].size(1)
        cls_dim = 1601
        image_info = {"cls_prob": torch.rand((bs, num_feat, cls_dim))}
        sample_list.add_field("is_correct", is_correct)
        sample_list.add_field("task", "mlm")
        sample_list.add_field("lm_label_ids", lm_label_ids)
        sample_list.add_field("input_ids_masked", input_ids_masked)
        sample_list.add_field("image_info_0", image_info)
        sample_list.to(get_current_device())
    def change_dataloader(self):
        choice = 0

        if self.num_datasets <= 1:
            self.current_index = choice
            return

        if self._is_main:
            choice = self.iteration_strategy()

            # self._finished_iterators will always be empty in case of
            # non-proportional (equal) sampling
            while self.dataset_list[choice] in self._finished_iterators:
                choice = self.iteration_strategy()

        choice = broadcast_scalar(choice, 0, device=get_current_device())
        self.current_index = choice
Beispiel #24
0
    def test_uniter_for_classification(self):
        heads = {"test": {"type": "mlp", "num_labels": 3129}}
        tasks = "test"
        losses = {"test": "logit_bce"}
        model = UNITERForClassification(head_configs=heads,
                                        loss_configs=losses,
                                        tasks=tasks)

        model.eval()
        model = model.to(get_current_device())
        sample_list = self._get_sample_list()

        with torch.no_grad():
            model_output = model(sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("test/test/logit_bce" in model_output["losses"])
Beispiel #25
0
    def classify(self, image: ImageType, text: str):
        """Classifies a given image and text in it into Hateful/Non-Hateful.
        Image can be a url or a local path or you can directly pass a PIL.Image.Image
        object. Text needs to be a sentence containing all text in the image.

            >>> from mmf.models.mmbt import MMBT
            >>> model = MMBT.from_pretrained("mmbt.hateful_memes.images")
            >>> model.classify("some_url", "some_text")
            {"label": 0, "confidence": 0.56}

        Args:
            image (ImageType): Image to be classified
            text (str): Text in the image

        Returns:
            bool: Whether image is hateful (1) or non hateful (0)
        """
        if isinstance(image, str):
            if image.startswith("http"):
                temp_file = tempfile.NamedTemporaryFile()
                download(image,
                         *os.path.split(temp_file.name),
                         disable_tqdm=True)
                image = tv_helpers.default_loader(temp_file.name)
                temp_file.close()
            else:
                image = tv_helpers.default_loader(image)

        text = self.processor_dict["text_processor"]({"text": text})
        image = self.processor_dict["image_processor"](image)

        sample = Sample()
        sample.text = text["text"]
        if "input_ids" in text:
            sample.update(text)

        sample.image = image
        sample_list = SampleList([sample])
        sample_list = sample_list.to(get_current_device())

        output = self.model(sample_list)
        scores = nn.functional.softmax(output["scores"], dim=1)
        confidence, label = torch.max(scores, dim=1)

        return {"label": label.item(), "confidence": confidence.item()}
Beispiel #26
0
    def test_pretraining_forward(self):
        model = VinVLForPretraining().to(get_current_device())
        model.eval()

        with torch.no_grad():
            model_output = model(
                img_feats=self.img_feats,
                attention_mask=self.attention_mask,
                token_type_ids=self.token_type_ids,
                input_ids_masked=self.input_ids,
                lm_label_ids=self.lm_label_ids,
                contrastive_labels=self.contrastive_labels,
                input_ids_corrupt=self.input_ids,
                token_type_ids_corrupt=self.token_type_ids,
                attention_mask_corrupt=self.attention_mask,
            )
        self.assertTrue("losses" in model_output)
        self.assertTrue("masked_lm_loss" in model_output["losses"])
        self.assertTrue("three_way_contrastive_loss" in model_output["losses"])
    def change_dataloader(self):
        choice = 0

        if self.num_datasets <= 1:
            self.current_index = choice
            return

        if self._is_master:
            choice = np.random.choice(
                self.num_datasets, 1, p=self._dataset_probabilities
            )[0]

            # self._finished_iterators will always be empty in case of
            # non-proportional (equal) sampling
            while self.dataset_list[choice] in self._finished_iterators:
                choice = np.random.choice(
                    self.num_datasets, 1, p=self._dataset_probabilities
                )[0]

        choice = broadcast_scalar(choice, 0, device=get_current_device())
        self.current_index = choice
Beispiel #28
0
    def _get_sample_list(self):
        bs = 8
        num_feats = 70

        class MockObj:
            pass

        mock_input = MockObj()
        mock_vinvl_input_tensors(mock_input, bs=bs, num_feats=num_feats)

        input_mask = torch.ones_like(mock_input.input_ids)
        max_features = torch.ones((bs, num_feats)) * num_feats
        bbox = torch.randint(50, 200, (bs, num_feats, 4)).float()
        image_height = torch.randint(100, 300, (bs,))
        image_width = torch.randint(100, 300, (bs,))
        image_info = {
            "max_features": max_features,
            "bbox": bbox,
            "image_height": image_height,
            "image_width": image_width,
        }

        sample_list = SampleList()
        sample_list.add_field("input_ids", mock_input.input_ids)
        sample_list.add_field("input_ids_corrupt", mock_input.input_ids)
        sample_list.add_field("input_ids_masked", mock_input.input_ids)
        sample_list.add_field("image_feature_0", mock_input.img_feats)
        sample_list.add_field("image_info_0", image_info)
        sample_list.add_field("input_mask", input_mask)
        sample_list.add_field("input_mask_corrupt", input_mask)
        sample_list.add_field("segment_ids", mock_input.token_type_ids)
        sample_list.add_field("segment_ids_corrupt", mock_input.token_type_ids)
        sample_list.add_field("labels", mock_input.labels)
        sample_list.add_field("contrastive_labels", mock_input.contrastive_labels)
        sample_list.add_field("lm_label_ids", mock_input.lm_label_ids)
        sample_list = sample_list.to(get_current_device())
        sample_list.dataset_name = "test"
        sample_list.dataset_type = "test"
        return sample_list
Beispiel #29
0
    def __init__(self, trainer):
        """
        Generates a path for saving model which can also be used for resuming
        from a checkpoint.
        """
        self.trainer = trainer

        self.config = self.trainer.config
        self.save_dir = get_mmf_env(key="save_dir")
        self.model_name = self.config.model
        self.ckpt_foldername = self.save_dir
        self.device = get_current_device()
        self.ckpt_prefix = ""

        if hasattr(self.trainer.model, "get_ckpt_name"):
            self.ckpt_prefix = self.trainer.model.get_ckpt_name() + "_"

        self.pth_filepath = os.path.join(
            self.ckpt_foldername, self.ckpt_prefix + self.model_name + "_final.pth"
        )

        self.models_foldername = os.path.join(self.ckpt_foldername, "models")
        if not PathManager.exists(self.models_foldername):
            PathManager.mkdirs(self.models_foldername)

        self.save_config()

        self.repo_path = updir(os.path.abspath(__file__), n=3)
        self.git_repo = None
        if git and self.config.checkpoint.save_git_details:
            try:
                self.git_repo = git.Repo(self.repo_path)
            except git.exc.InvalidGitRepositoryError:
                # Not a git repo, don't do anything
                pass

        self.max_to_keep = self.config.checkpoint.max_to_keep
        self.saved_iterations = []
Beispiel #30
0
    def test_forward(self):
        img_feature_dim = 2054
        bert_model_name = "bert-base-uncased"
        use_img_layernorm = True
        img_layer_norm_eps = 1e-12
        bert_config = BertConfig.from_pretrained(bert_model_name)
        # augment hf BertConfig for vinvl BertImgModel config
        bert_config.img_feature_dim = img_feature_dim
        bert_config.use_img_layernorm = use_img_layernorm
        bert_config.img_layer_norm_eps = img_layer_norm_eps
        model = VinVLBase(bert_config)

        model.eval()
        model = model.to(get_current_device())

        bs = 8
        num_feats = 70
        max_sentence_len = 25
        input_ids = torch.ones((bs, max_sentence_len), dtype=torch.long)
        img_feat = torch.rand((bs, num_feats, img_feature_dim))

        with torch.no_grad():
            model_output = model(input_ids, img_feat).last_hidden_state
        self.assertEqual(model_output.shape, torch.Size([8, 95, 768]))