def _getImageDtype(self, dataset): # This may not be the best way to get the type of image, but it is one way. # Assumes that the dtype for all images is the same in the given dataframe. pdf = dataset.select(self.getInputCol()).take(1) img = pdf[0][self.getInputCol()] img_type = imageIO.imageTypeByOrdinal(img.mode) return img_type.dtype
def check_image_round_trip(img_arr): spimg_dict = imageArrayToStruct(img_arr).asDict() spimg_dict['data'] = bytes(spimg_dict['data']) img_arr_out = exec_gfn_spimg_decode( spimg_dict, imageTypeByOrdinal(spimg_dict['mode']).dtype) self.assertTrue(np.all(img_arr_out == img_arr))
def test_readImages(self): # Test that reading imageDF = imageIO._readImagesWithCustomFn( "file/path", decode_f=imageIO.PIL_decode, numPartition=2, sc=self.binaryFilesMock) self.assertTrue("image" in imageDF.schema.names) # The DF should have 2 images and 1 null. self.assertEqual(imageDF.count(), 3) validImages = imageDF.filter(col("image").isNotNull()) self.assertEqual(validImages.count(), 2) img = validImages.first().image self.assertEqual(img.height, array.shape[0]) self.assertEqual(img.width, array.shape[1]) self.assertEqual(imageIO.imageTypeByOrdinal(img.mode).nChannels, array.shape[2]) # array comes out of PIL and is in RGB order self.assertEqual(img.data, array.tobytes())
def test_readImages(self): # Test that reading imageDF = imageIO._readImagesWithCustomFn("file/path", decode_f=imageIO.PIL_decode, numPartition=2, sc=self.binaryFilesMock) self.assertTrue("image" in imageDF.schema.names) # The DF should have 2 images and 1 null. self.assertEqual(imageDF.count(), 3) validImages = imageDF.filter(col("image").isNotNull()) self.assertEqual(validImages.count(), 2) img = validImages.first().image self.assertEqual(img.height, array.shape[0]) self.assertEqual(img.width, array.shape[1]) self.assertEqual( imageIO.imageTypeByOrdinal(img.mode).nChannels, array.shape[2]) # array comes out of PIL and is in RGB order self.assertEqual(img.data, array.tobytes())