def test_resize_and_normalize_matches_generalized_rcnn_transform(self): loader = TorchImageLoader(resize=True, normalize=True, size_divisibility=32) transform = torchvision.models.detection.transform.GeneralizedRCNNTransform( loader.min_size, loader.max_size, loader.pixel_mean, loader.pixel_std ) loaded_image, _ = loader([self.image_fixture_path]) raw_image, _ = TorchImageLoader(resize=False, normalize=False)(self.image_fixture_path) transformed_raw_image, _ = transform([raw_image]) assert loaded_image.shape == transformed_raw_image.tensors.shape
def test_resize_and_normalize(self, device): loader = TorchImageLoader(resize=True, normalize=True, device=device) torch_device = torch.device(device) image, size = loader(self.image_fixture_path) assert image.device == torch_device assert size.device == torch_device assert image.shape[1] == 800
def test_forward_runs(self): loader = TorchImageLoader(device="cuda:0") backbone = ResnetBackbone().to("cuda:0") image_pixels, image_size = loader([ self.FIXTURES_ROOT / "data" / "images" / "COCO_train2014_000000458752.jpg" ]) result = backbone(image_pixels, image_size) assert tuple(result.keys()) == backbone.get_feature_names()
def test_forward_runs(self): loader = TorchImageLoader(resize=True, normalize=True, device="cuda:0") backbone = ResnetBackbone().to(device="cuda:0") backbone.eval() detector = FasterRcnnRegionDetector().to(device="cuda:0") detector.eval() image_path = self.FIXTURES_ROOT / "data" / "images" / "COCO_train2014_000000458752.jpg" images, sizes = loader([image_path, image_path]) image_features = backbone(images, sizes) del backbone detections = detector(images, sizes, image_features) del detector assert len(detections.features) == 2 assert len(detections.boxes) == 2 assert len(detections.class_probs) == 2 assert len(detections.class_labels) == 2 assert detections.features[0].shape[0] >= 1 assert detections.features[0].shape[1] == 1024 assert (detections.features[0].shape[0] == detections.boxes[0].shape[0] == detections.class_probs[0].shape[0] == detections.class_labels[0].shape[0]) # Okay, cool, so far so good. Now let's make sure the output we got # actually matches exactly what we would get using the full pipeline # directly from torchvision. raw_loader = TorchImageLoader(resize=False, normalize=False, device="cuda:0") image, _ = raw_loader(image_path) model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True).to("cuda:0") model.eval() result = model([image, image]) # We can't compare the boxes directly because the boxes here are post-processed # back to reference the original un-resized image. But we can compare # the labels and scores. They should match exactly. assert (result[0]["labels"] == detections.class_labels[0]).all() assert (result[0]["scores"] == detections.class_probs[0]).all()
def test_train_read(self): self.reader = Flickr30kReader( image_dir=FIXTURES_ROOT / "vision" / "images" / "flickr30k", image_loader=TorchImageLoader(), image_featurizer=Lazy(NullGridEmbedder), data_dir=FIXTURES_ROOT / "vision" / "flickr30k" / "sentences", region_detector=Lazy(RandomRegionDetector), tokenizer=WhitespaceTokenizer(), token_indexers={"tokens": SingleIdTokenIndexer()}, featurize_captions=False, num_potential_hard_negatives=4, ) instances = list(self.reader.read("test_fixtures/vision/flickr30k/test.txt")) assert len(instances) == 25 instance = instances[5] assert len(instance.fields) == 5 assert len(instance["caption"]) == 4 assert len(instance["caption"][0]) == 12 # 16 assert instance["caption"][0] != instance["caption"][1] assert instance["caption"][0] == instance["caption"][2] assert instance["caption"][0] == instance["caption"][3] question_tokens = [t.text for t in instance["caption"][0]] assert question_tokens == [ "girl", "with", "brown", "hair", "sits", "on", "edge", "of", "concrete", "area", "overlooking", "water", ] batch = Batch(instances) batch.index_instances(Vocabulary()) tensors = batch.as_tensor_dict() # (batch size, num images (3 hard negatives + gold image), num boxes (fake), num features (fake)) assert tensors["box_features"].size() == (25, 4, 2, 10) # (batch size, num images (3 hard negatives + gold image), num boxes (fake), 4 coords) assert tensors["box_coordinates"].size() == (25, 4, 2, 4) # (batch size, num images (3 hard negatives + gold image), num boxes (fake),) assert tensors["box_mask"].size() == (25, 4, 2) # (batch size) assert tensors["label"].size() == (25,)
def setup_method(self): from allennlp_models.vision.dataset_readers.gqa import GQAReader super().setup_method() self.reader = GQAReader( image_dir=FIXTURES_ROOT / "vision" / "images" / "gqa", image_loader=TorchImageLoader(), image_featurizer=Lazy(NullGridEmbedder), region_detector=Lazy(RandomRegionDetector), tokenizer=WhitespaceTokenizer(), token_indexers={"tokens": SingleIdTokenIndexer()}, )
def test_read(self): from allennlp_models.vision.dataset_readers.vqav2 import VQAv2Reader reader = VQAv2Reader( image_dir=FIXTURES_ROOT / "vision" / "images" / "vqav2", image_loader=TorchImageLoader(), image_featurizer=Lazy(NullGridEmbedder), region_detector=Lazy(RandomRegionDetector), tokenizer=WhitespaceTokenizer(), token_indexers={"tokens": SingleIdTokenIndexer()}, ) instances = list(reader.read("unittest")) assert len(instances) == 3 instance = instances[0] assert len(instance.fields) == 6 assert len(instance["question"]) == 7 question_tokens = [t.text for t in instance["question"]] assert question_tokens == [ "What", "is", "this", "photo", "taken", "looking", "through?" ] assert len(instance["labels"]) == 5 labels = [field.label for field in instance["labels"].field_list] assert labels == ["net", "netting", "mesh", "pitcher", "orange"] assert torch.allclose( instance["label_weights"].tensor, torch.tensor([1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3, 1.0 / 3]), ) batch = Batch(instances) batch.index_instances(Vocabulary()) tensors = batch.as_tensor_dict() # (batch size, num boxes (fake), num features (fake)) assert tensors["box_features"].size() == (3, 2, 10) # (batch size, num boxes (fake), 4 coords) assert tensors["box_coordinates"].size() == (3, 2, 4) # (batch size, num boxes (fake),) assert tensors["box_mask"].size() == (3, 2) # Nothing should be masked out since the number of fake boxes is the same # for each item in the batch. assert tensors["box_mask"].all()
def test_read(self): from allennlp_models.vision.dataset_readers.vgqa import VGQAReader reader = VGQAReader( image_dir=FIXTURES_ROOT / "vision" / "images" / "vgqa", image_loader=TorchImageLoader(), image_featurizer=Lazy(NullGridEmbedder), region_detector=Lazy(RandomRegionDetector), tokenizer=WhitespaceTokenizer(), token_indexers={"tokens": SingleIdTokenIndexer()}, ) instances = list( reader.read("test_fixtures/vision/vgqa/question_answers.json")) assert len(instances) == 8 instance = instances[0] assert len(instance.fields) == 6 assert len(instance["question"]) == 5 question_tokens = [t.text for t in instance["question"]] assert question_tokens == ["What", "is", "on", "the", "curtains?"] assert len(instance["labels"]) == 1 labels = [field.label for field in instance["labels"].field_list] assert labels == ["sailboats"] batch = Batch(instances) batch.index_instances(Vocabulary()) tensors = batch.as_tensor_dict() # (batch size, num boxes (fake), num features (fake)) assert tensors["box_features"].size() == (8, 2, 10) # (batch size, num boxes (fake), 4 coords) assert tensors["box_coordinates"].size() == (8, 2, 4) # (batch size, num boxes (fake)) assert tensors["box_mask"].size() == (8, 2) # Nothing should be masked out since the number of fake boxes is the same # for each item in the batch. assert tensors["box_mask"].all()
def test_read(self): from allennlp_models.vision.dataset_readers.nlvr2 import Nlvr2Reader reader = Nlvr2Reader( image_dir=FIXTURES_ROOT / "vision" / "images" / "nlvr2", image_loader=TorchImageLoader(), image_featurizer=Lazy(NullGridEmbedder), region_detector=Lazy(RandomRegionDetector), tokenizer=WhitespaceTokenizer(), token_indexers={"tokens": SingleIdTokenIndexer()}, ) instances = list( reader.read("test_fixtures/vision/nlvr2/tiny-dev.json")) assert len(instances) == 8 instance = instances[0] assert len(instance.fields) == 6 assert instance["hypothesis"][0] == instance["hypothesis"][1] assert len(instance["hypothesis"][0]) == 18 hypothesis_tokens = [t.text for t in instance["hypothesis"][0]] assert hypothesis_tokens[:6] == [ "The", "right", "image", "shows", "a", "curving" ] assert instance["label"].label == 0 assert instances[1]["label"].label == 1 assert instance["identifier"].metadata == "dev-850-0-0" batch = Batch(instances) batch.index_instances(Vocabulary()) tensors = batch.as_tensor_dict() # (batch size, 2 images per instance, num boxes (fake), num features (fake)) assert tensors["box_features"].size() == (8, 2, 2, 10) # (batch size, 2 images per instance, num boxes (fake), 4 coords) assert tensors["box_coordinates"].size() == (8, 2, 2, 4) # (batch size, 2 images per instance, num boxes (fake)) assert tensors["box_mask"].size() == (8, 2, 2)
def test_read(self): from allennlp_models.vision.dataset_readers.visual_entailment import VisualEntailmentReader reader = VisualEntailmentReader( image_dir=FIXTURES_ROOT / "vision" / "images" / "visual_entailment", image_loader=TorchImageLoader(), image_featurizer=Lazy(NullGridEmbedder), region_detector=Lazy(RandomRegionDetector), tokenizer=WhitespaceTokenizer(), token_indexers={"tokens": SingleIdTokenIndexer()}, ) instances = list( reader.read( "test_fixtures/vision/visual_entailment/sample_pairs.jsonl")) assert len(instances) == 16 instance = instances[0] assert len(instance.fields) == 5 assert len(instance["hypothesis"]) == 4 sentence_tokens = [t.text for t in instance["hypothesis"]] assert sentence_tokens == ["A", "toddler", "sleeps", "outside."] assert instance["labels"].label == "contradiction" batch = Batch(instances) vocab = Vocabulary() vocab.add_tokens_to_namespace( ["entailment", "contradiction", "neutral"], "labels") batch.index_instances(vocab) tensors = batch.as_tensor_dict() # (batch size, num boxes (fake), num features (fake)) assert tensors["box_features"].size() == (16, 2, 10) # (batch size, num boxes (fake), 4 coords) assert tensors["box_coordinates"].size() == (16, 2, 4) # (batch_size, num boxes (fake),) assert tensors["box_mask"].size() == (16, 2)
def test_basic_load(self, device, loader_params): loader = TorchImageLoader(resize=False, normalize=False, device=device, **loader_params) torch_device = torch.device(device) images, sizes = loader([self.TEST_DIR / "image1.jpg", self.TEST_DIR / "image2.jpg"]) assert images.device == torch_device assert sizes.device == torch_device assert images.shape[0] == 2 assert images.shape[1] == 3 assert sizes.shape == (2, 2) assert list(sizes[0]) == [7, 15] assert list(sizes[1]) == [9, 12] if loader.size_divisibility <= 1: assert images.shape[2] == 9 assert images.shape[3] == 15 else: assert images.shape[2] >= 9 assert images.shape[3] >= 15 assert (images.shape[2] / loader.size_divisibility) % 1 == 0 image, size = loader(self.TEST_DIR / "image1.jpg") assert image.device == torch_device assert size.device == torch_device assert len(image.shape) == 3 assert list(size) == [7, 15]