def test_extract_patch_for_padding(self): """Test patch extraction which is not completely inside the original image, hence needing padding.""" input_image = Image(np.random.rand(100, 71, 3)) patch_size = 10 """Test with patch which hangs outside the image on the top left.""" patch_center_x = 3 patch_center_y = 4 computed_patch = input_image.extract_patch(patch_center_x, patch_center_y, patch_size) # check the patch dimensions self.assertEqual(computed_patch.width, patch_size) self.assertEqual(computed_patch.height, patch_size) # check for zeros in area out of bounds np.testing.assert_allclose(computed_patch.value_array[:1, :], 0) np.testing.assert_allclose(computed_patch.value_array[:, :2], 0) # check the patch contents with the original input np.testing.assert_allclose( computed_patch.value_array[1:, 2:], input_image.value_array[:patch_center_y + (patch_size + 1) // 2, :patch_center_x + (patch_size + 1) // 2, ], ) """Test with patch which hangs outside the image on bottom right.""" patch_center_x = 70 patch_center_y = 96 computed_patch = input_image.extract_patch(patch_center_x, patch_center_y, patch_size) # check the patch dimensions self.assertEqual(computed_patch.width, patch_size) self.assertEqual(computed_patch.height, patch_size) # check for zeros in area out of bounds np.testing.assert_allclose(computed_patch.value_array[-1:, :], 0) np.testing.assert_allclose(computed_patch.value_array[:, -4:], 0) # check the patch contents with the original input np.testing.assert_allclose( computed_patch.value_array[:-1, :-4], input_image.value_array[patch_center_y - patch_size // 2:, patch_center_x - patch_size // 2:, ], )
def load_image(img_path: str) -> Image: """Load the image from disk. Args: img_path (str): the path of image to load. Returns: loaded image in RGB format. """ original_image = PILImage.open(img_path) exif_data = original_image.getexif() if exif_data is not None: parsed_data = {} for tag, value in exif_data.items(): if tag in TAGS: parsed_data[TAGS.get(tag)] = value elif tag in GPSTAGS: parsed_data[GPSTAGS.get(tag)] = value else: parsed_data[tag] = value exif_data = parsed_data return Image(np.asarray(original_image), exif_data)
def vstack_image_list(imgs: List[np.ndarray]) -> Image: """Concatenate images along a vertical axis and save them. Args: imgs: list of Images, must all be of same width Returns: vstack_img: new RGB image, containing vertically stacked images as tiles. """ img_h, img_w, ch = imgs[0].value_array.shape assert ch == 3 # width and number of channels must match assert all(img.width == img_w for img in imgs) assert all(img.value_array.shape[2] == ch for img in imgs) all_heights = [img.height for img in imgs] vstack_img = np.zeros((sum(all_heights), img_w, 3), dtype=np.uint8) running_h = 0 for i, img in enumerate(imgs): h = img.height start = running_h end = start + h vstack_img[start:end, :, :] = img.value_array running_h += h return Image(vstack_img)
def vstack_images(image_i1: Image, image_i2: Image) -> Image: """Vertically stack two images. Args: image_i1: 1st image to stack. image_i2: 2nd image to stack. Returns: Image: stacked image """ new_height = image_i1.height + image_i2.height new_width = max(image_i1.width, image_i2.width) stacked_arr = np.ones( (new_height, new_width, 3), dtype=image_i1.value_array.dtype, ) if np.issubdtype(stacked_arr.dtype, np.integer): stacked_arr[:] = 255 stacked_arr[:image_i1.height, :image_i1.width, :] = image_i1.value_array stacked_arr[image_i1.height:, :image_i2.width, :] = image_i2.value_array return Image(stacked_arr)
def get_image_full_res(self, index: int) -> Image: """Get the image at the given index, at full resolution. Args: index: the index to fetch. Raises: IndexError: if an out-of-bounds image index is requested. Returns: Image: the image at the query index. """ if index < 0 or index >= len(self): raise IndexError(f"Image index {index} is invalid") # Read in image. img = io_utils.load_image(self._image_paths[index]) # Generate mask to separate background deep space from foreground target body # based on image intensity values. mask = get_nonzero_intensity_mask(img) return Image(value_array=img.value_array, exif_data=img.exif_data, file_name=img.file_name, mask=mask)
def draw_line_cv2( image: Image, x1: int, y1: int, x2: int, y2: int, line_color: Tuple[int, int, int], line_thickness: int = 10, ) -> Image: """Draw a line on the image from coordinates (x1, y1) to (x2, y2). Args: image: image to draw the line on. x1: x coordinate of start of the line. y1: y coordinate of start of the line. x2: x coordinate of end of the line. y2: y coordinate of end of the line. line_color: color of the line. line_thickness (optional): line thickness. Defaults to 10. Returns: Image: image with the line drawn on it. """ return Image( cv.line(image.value_array, (x1, y1), (x2, y2), line_color, line_thickness, cv.LINE_AA))
def test_round_trip_images_txt(self) -> None: """Starts with a pose. Writes the pose to images.txt (in a temporary directory). Then reads images.txt to recover that same pose. Checks if the original wTc and recovered wTc match up.""" # fmt: off # Rotation 45 degrees about the z-axis. original_wRc = np.array([[np.cos(np.pi / 4), -np.sin(np.pi / 4), 0], [np.sin(np.pi / 4), np.cos(np.pi / 4), 0], [0, 0, 1]]) original_wtc = np.array([3, -2, 1]) # fmt: on # Setup dummy GtsfmData Object with one image original_wTc = Pose3(Rot3(original_wRc), original_wtc) default_intrinsics = Cal3Bundler(fx=100, k1=0, k2=0, u0=0, v0=0) camera = PinholeCameraCal3Bundler(original_wTc, default_intrinsics) gtsfm_data = GtsfmData(number_images=1) gtsfm_data.add_camera(0, camera) image = Image(value_array=None, file_name="dummy_image.jpg") images = [image] # Perform write and read operations inside a temporary directory with tempfile.TemporaryDirectory() as tempdir: images_fpath = os.path.join(tempdir, "images.txt") io_utils.write_images(gtsfm_data, images, tempdir) wTi_list, _ = io_utils.read_images_txt(images_fpath) recovered_wTc = wTi_list[0] npt.assert_almost_equal(original_wTc.matrix(), recovered_wTc.matrix(), decimal=3)
def resize_image(image: Image, new_height: int, new_width: int) -> Image: """Resize the image to given dimensions, preserving filename metadata. Args: image: image to resize. new_height: height of the new image. new_width: width of the new image. Returns: resized image. """ resized_value_array = cv.resize( image.value_array, (new_width, new_height), interpolation=cv.INTER_CUBIC, ) # Resize the mask using nearest-neighbor interpolation. if image.mask: resized_mask = cv.resize( image.mask, (new_width, new_height), interpolation=cv.INTER_NEAREST, ) else: resized_mask = None return Image(value_array=resized_value_array, file_name=image.file_name, mask=resized_mask)
def draw_circle_cv2( image: Image, x: int, y: int, color: Tuple[int, int, int], circle_size: int = 10, ) -> Image: """Draw a solid circle on the image. Args: image: image to draw the circle on. x: x coordinate of the center of the circle. y: y coordinate of the center of the circle. color: RGB color of the circle. Returns: Image: image with the circle drawn on it. """ return Image( cv.circle( image.value_array, center=(x, y), radius=circle_size, color=color, thickness=-1, # solid circle ))
def rgb_to_gray_cv(image: Image) -> Image: """ RGB to Grayscale conversion using opencv Args: image: Input RGB/RGBA image. Raises: ValueError: wrong input dimensions Returns: grayscale transformed image. """ input_array = image.value_array output_array = input_array if len(input_array.shape) == 2: pass elif input_array.shape[2] == 4: output_array = cv.cvtColor(input_array, cv.COLOR_RGBA2GRAY) elif input_array.shape[2] == 3: output_array = cv.cvtColor(input_array, cv.COLOR_RGB2GRAY) else: raise ValueError("Input image dimensions are wrong") return Image(output_array, image.exif_data)
def test_extract_patch_fully_inside(self): """Test patch extraction which is fully inside the original image.""" input_image = Image(np.random.rand(100, 71, 3)) patch_center_x = 21 patch_center_y = 22 """Test with even patch size.""" patch_size = 10 computed_patch = input_image.extract_patch(patch_center_x, patch_center_y, patch_size) # check the patch dimensions self.assertEqual(computed_patch.width, patch_size) self.assertEqual(computed_patch.height, patch_size) # check the patch contents np.testing.assert_allclose( computed_patch.value_array, input_image.value_array[patch_center_y - patch_size // 2:patch_center_y + (patch_size + 1) // 2, patch_center_x - patch_size // 2:patch_center_x + (patch_size + 1) // 2, ], ) """Test with odd patch size.""" patch_size = 11 computed_patch = input_image.extract_patch(patch_center_x, patch_center_y, patch_size) # check the patch dimensions self.assertEqual(computed_patch.width, patch_size) self.assertEqual(computed_patch.height, patch_size) # check the patch contents np.testing.assert_allclose( computed_patch.value_array, input_image.value_array[patch_center_y - patch_size // 2:patch_center_y + (patch_size + 1) // 2, patch_center_x - patch_size // 2:patch_center_x + (patch_size + 1) // 2, ], )
def test_get_intrinsics_from_exif(self, mock_init, mock_lookup): """Tests the intrinsics generation from exif.""" exif_data = { "FocalLength": 25, "Make": "testMake", "Model": "testModel", } expected_instrinsics = Cal3Bundler(fx=600.0, k1=0.0, k2=0.0, u0=60.0, v0=50.0) image = Image(np.random.randint(low=0, high=255, size=(100, 120, 3)), exif_data) computed_intrinsics = image.get_intrinsics_from_exif() self.assertTrue(expected_instrinsics.equals(computed_intrinsics, 1e-3))
def test_get_average_point_color(self): """Ensure 3d point color is computed as mean of RGB per 2d measurement.""" # random point; 2d measurements below are dummy locations (not actual projection) triangulated_pt = np.array([1, 2, 1]) track_3d = SfmTrack(triangulated_pt) # in camera 0 track_3d.addMeasurement(idx=0, m=np.array([130, 80])) # in camera 1 track_3d.addMeasurement(idx=1, m=np.array([10, 60])) img0 = np.zeros((100, 200, 3), dtype=np.uint8) img0[80, 130] = np.array([40, 50, 60]) img1 = np.zeros((100, 200, 3), dtype=np.uint8) img1[60, 10] = np.array([60, 70, 80]) images = {0: Image(img0), 1: Image(img1)} r, g, b = image_utils.get_average_point_color(track_3d, images) self.assertEqual(r, 50) self.assertEqual(g, 60) self.assertEqual(b, 70)
def resize_image(image: Image, new_height: int, new_width: int) -> Image: """Resize the image to given dimensions. Args: image: image to resize. new_height: height of the new image. new_width: width of the new image. Returns: resized image. """ resized_value_array = cv.resize( image.value_array, (new_width, new_height), interpolation=cv.INTER_CUBIC, ) return Image(resized_value_array)
def test_round_trip_cameras_txt(self) -> None: """Creates a two cameras and writes to cameras.txt (in a temporary directory). Then reads cameras.txt to recover the information. Checks if the original and recovered cameras match up.""" # Create multiple calibration data k1 = Cal3Bundler(fx=100, k1=0, k2=0, u0=0, v0=0) k2 = Cal3Bundler(fx=200, k1=0.001, k2=0, u0=1000, v0=2000) k3 = Cal3Bundler(fx=300, k1=0.004, k2=0.001, u0=1001, v0=2002) original_calibrations = [k1, k2, k3] gtsfm_data = GtsfmData(number_images=len(original_calibrations)) # Populate gtsfm_data with the generated vales for i in range(len(original_calibrations)): camera = PinholeCameraCal3Bundler(Pose3(), original_calibrations[i]) gtsfm_data.add_camera(i, camera) # Generate dummy images image = Image(value_array=np.zeros((240, 320)), file_name="dummy_image.jpg") images = [image for i in range(len(original_calibrations))] # Round trip with tempfile.TemporaryDirectory() as tempdir: cameras_fpath = os.path.join(tempdir, "cameras.txt") io_utils.write_cameras(gtsfm_data, images, tempdir) recovered_calibrations = io_utils.read_cameras_txt(cameras_fpath) self.assertEqual(len(original_calibrations), len(recovered_calibrations)) for i in range(len(recovered_calibrations)): K_ori = original_calibrations[i] K_rec = recovered_calibrations[i] self.assertEqual(K_ori.fx(), K_rec.fx()) self.assertEqual(K_ori.px(), K_rec.px()) self.assertEqual(K_ori.py(), K_rec.py()) self.assertEqual(K_ori.k1(), K_rec.k1()) self.assertEqual(K_ori.k2(), K_rec.k2())
def load_image(img_path: str) -> Image: """Load the image from disk. Notes: EXIF is read as a map from (tag_id, value) where tag_id is an integer. In order to extract human-readable names, we use the lookup table TAGS or GPSTAGS. Images will be converted to RGB if in a different format. Args: img_path (str): the path of image to load. Returns: loaded image in RGB format. """ original_image = PILImage.open(img_path) exif_data = original_image._getexif() if exif_data is not None: parsed_data = {} for tag_id, value in exif_data.items(): # extract the human readable tag name if tag_id in TAGS: tag_name = TAGS.get(tag_id) elif tag_id in GPSTAGS: tag_name = GPSTAGS.get(tag_id) else: tag_name = tag_id parsed_data[tag_name] = value exif_data = parsed_data img_fname = Path(img_path).name original_image = original_image.convert( "RGB") if original_image.mode != "RGB" else original_image return Image(value_array=np.asarray(original_image), exif_data=exif_data, file_name=img_fname)
u0=DEFAULT_IMAGE_W // 2, v0=DEFAULT_IMAGE_H // 2, ) # set default camera poses as described in GTSAM example DEFAULT_CAMERA_POSES = SFMdata.createPoses(DEFAULT_CAMERA_INTRINSICS) # set default camera instances DEFAULT_CAMERAS = [ PinholeCameraCal3_S2(DEFAULT_CAMERA_POSES[i], DEFAULT_CAMERA_INTRINSICS) for i in range(len(DEFAULT_CAMERA_POSES)) ] DEFAULT_NUM_CAMERAS = len(DEFAULT_CAMERAS) # the number of valid images should be equal to the number of cameras (with estimated pose) DEFAULT_NUM_IMAGES = DEFAULT_NUM_CAMERAS # build dummy image dictionary with default image shape DEFAULT_DUMMY_IMAGE_DICT = { i: Image(value_array=np.zeros([DEFAULT_IMAGE_H, DEFAULT_IMAGE_W, DEFAULT_IMAGE_C], dtype=int)) for i in range(DEFAULT_NUM_IMAGES) } # set camera[1] to be selected in test_get_item EXAMPLE_CAMERA_ID = 1 class TestPatchmatchNetData(unittest.TestCase): """Unit tests for the interface for PatchmatchNet.""" def setUp(self) -> None: """Set up the image dictionary and gtsfm result for the test.""" super().setUp() # set the number of images as the default number
v0=IMAGE_H // 2, ) # set dummy camera poses as described in GTSAM example CAMERA_POSES = SFMdata.createPoses(CAMERA_INTRINSICS) # set dummy camera instances CAMERAS = [ PinholeCameraCal3_S2(CAMERA_POSES[i], CAMERA_INTRINSICS) for i in range(len(CAMERA_POSES)) ] NUM_CAMERAS = len(CAMERAS) # the number of valid images should be equal to the number of cameras (with estimated pose) NUM_IMAGES = NUM_CAMERAS # build dummy image dictionary with dummy image shape DUMMY_IMAGE_DICT = { i: Image(value_array=np.zeros([IMAGE_H, IMAGE_W, IMAGE_C], dtype=int)) for i in range(NUM_IMAGES) } # a reconstructed point is consistent in geometry if it satisfies all geometric thresholds in more than 3 source views MIN_NUM_CONSISTENT_VIEWS = 3 # the reprojection error in pixel coordinates should be less than 1 MAX_GEOMETRIC_PIXEL_THRESH = 1 class TestMVSPatchmatchNet(unittest.TestCase): """Unit tests for PatchmatchNet method.""" def setUp(self) -> None: """Set up the image dictionary and gtsfm result for the test.""" super().setUp()
"""Unit tests for detector-descriptor cacher. Authors: Ayush Baid """ from pathlib import Path import unittest from unittest.mock import MagicMock, patch import numpy as np from gtsfm.frontend.cacher.detector_descriptor_cacher import DetectorDescriptorCacher from gtsfm.common.image import Image from gtsfm.common.keypoints import Keypoints DUMMY_IMAGE = Image( value_array=np.random.randint(low=0, high=255, size=(100, 120, 3))) DUMMY_KEYPOINTS = Keypoints(coordinates=np.random.rand(10, 2), scales=np.random.rand(10), responses=np.random.rand(10)) DUMMY_DESCRIPTORS = np.random.rand(len(DUMMY_KEYPOINTS), 128) ROOT_PATH = Path(__file__).resolve().parent.parent.parent.parent class TestDetectorDescriptorCacher(unittest.TestCase): """Unit tests for DetectorDescriptorCacher.""" @patch("gtsfm.utils.cache.generate_hash_for_image", return_value="img_key") @patch("gtsfm.utils.io.read_from_bz2_file", return_value=None) @patch("gtsfm.utils.io.write_to_bz2_file") def test_cache_miss(self, write_mock: MagicMock, read_mock: MagicMock,