Esempio n. 1
0
 def create_and_check_model(self, config, pixel_values):
     model = CLIPVisionModel(config=config)
     model.to(torch_device)
     model.eval()
     result = model(pixel_values)
     # expected sequence length = num_patches + 1 (we add 1 for the [CLS] token)
     image_size = (self.image_size, self.image_size)
     patch_size = (self.patch_size, self.patch_size)
     num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
     self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, num_patches + 1, self.hidden_size))
     self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
Esempio n. 2
0
 def get_vision_text_model(self, vision_config, text_config):
     vision_model = CLIPVisionModel(vision_config).eval()
     text_model = BertModel(text_config).eval()
     return vision_model, text_model
Esempio n. 3
0
 def test_model_from_pretrained(self):
     for model_name in CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
         model = CLIPVisionModel.from_pretrained(model_name)
         self.assertIsNotNone(model)