def _rowWithImage(self, img): row = imageIO.imageArrayToStruct(img.astype('uint8')) # re-order row to avoid pyspark bug return [[ getattr(row, field.name) for field in ImageSchema.imageSchema['image'].dataType ]]
def check_image_round_trip(img_arr): spimg_dict = imageArrayToStruct(img_arr).asDict() spimg_dict['data'] = bytes(spimg_dict['data']) img_arr_out = exec_gfn_spimg_decode( spimg_dict, imageTypeByOrdinal(spimg_dict['mode']).dtype) self.assertTrue(np.all(img_arr_out == img_arr))
def test_pipeline(self): """ Pipeline should provide correct function composition """ img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg')) xcpt_model = Xception(weights="imagenet") stages = [('spimage', gfac.buildSpImageConverter(SparkMode.RGB_FLOAT32)), ('xception', GraphFunction.fromKeras(xcpt_model))] piped_model = GraphFunction.fromList(stages) for fpath in img_fpaths: target_size = tuple(xcpt_model.input.shape.as_list()[1:-1]) img = load_img(fpath, target_size=target_size) img_arr = np.expand_dims(img_to_array(img), axis=0) img_input = xcpt.preprocess_input(img_arr) preds_ref = xcpt_model.predict(img_input) spimg_input_dict = imageArrayToStruct(img_input).asDict() spimg_input_dict['data'] = bytes(spimg_input_dict['data']) with IsolatedSession() as issn: # Need blank import scope name so that spimg fields match the input names feeds, fetches = issn.importGraphFunction(piped_model, prefix="") feed_dict = dict( (tnsr, spimg_input_dict[tfx.op_name(tnsr, issn.graph)]) for tnsr in feeds) preds_tgt = issn.run(fetches[0], feed_dict=feed_dict) # Uncomment the line below to see the graph # tfx.write_visualization_html(issn.graph, # NamedTemporaryFile(prefix="gdef", suffix=".html").name) self.assertTrue(np.all(preds_tgt == preds_ref))
def test_pipeline(self): """ Pipeline should provide correct function composition """ img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg')) xcpt_model = Xception(weights="imagenet") stages = [('spimage', gfac.buildSpImageConverter(SparkMode.RGB_FLOAT32)), ('xception', GraphFunction.fromKeras(xcpt_model))] piped_model = GraphFunction.fromList(stages) for fpath in img_fpaths: target_size = tuple(xcpt_model.input.shape.as_list()[1:-1]) img = load_img(fpath, target_size=target_size) img_arr = np.expand_dims(img_to_array(img), axis=0) img_input = xcpt.preprocess_input(img_arr) preds_ref = xcpt_model.predict(img_input) spimg_input_dict = imageArrayToStruct(img_input).asDict() spimg_input_dict['data'] = bytes(spimg_input_dict['data']) with IsolatedSession() as issn: # Need blank import scope name so that spimg fields match the input names feeds, fetches = issn.importGraphFunction(piped_model, prefix="") feed_dict = dict((tnsr, spimg_input_dict[tfx.op_name(issn.graph, tnsr)]) for tnsr in feeds) preds_tgt = issn.run(fetches[0], feed_dict=feed_dict) # Uncomment the line below to see the graph # tfx.write_visualization_html(issn.graph, # NamedTemporaryFile(prefix="gdef", suffix=".html").name) self.assertTrue(np.all(preds_tgt == preds_ref))
def rowWithImage(img): # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)] row = imageIO.imageArrayToStruct(img.astype('uint8'), imageIO.SparkMode.RGB) # re-order row to avoid pyspark bug return [[ getattr(row, field.name) for field in imageIO.imageSchema ]]
def _test(array): height, width, chan = array.shape imgAsStruct = imageIO.imageArrayToStruct(array) self.assertEqual(imgAsStruct.height, height) self.assertEqual(imgAsStruct.width, width) self.assertEqual(imgAsStruct.data, array.tobytes()) imgReconstructed = imageIO.imageStructToArray(imgAsStruct) np.testing.assert_array_equal(array, imgReconstructed)
def create_image(img_id, img_in_bytes, height, width, encoding, is_bigendian): import numpy as np import cv2 from sparkdl.image.imageIO import imageArrayToStruct a = np.fromstring(str(img_in_bytes), dtype=np.uint8) img = a.reshape(height, width, 3) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) return imageArrayToStruct(img)
def test_imageArrayToStruct(self): SparkMode = imageIO.SparkMode # Check converting with matching types height, width, chan = array.shape imgAsStruct = imageIO.imageArrayToStruct(array) self.assertEqual(imgAsStruct.height, height) self.assertEqual(imgAsStruct.width, width) self.assertEqual(imgAsStruct.data, array.tobytes()) # Check casting imgAsStruct = imageIO.imageArrayToStruct(array, SparkMode.RGB_FLOAT32) self.assertEqual(imgAsStruct.height, height) self.assertEqual(imgAsStruct.width, width) self.assertEqual(len(imgAsStruct.data), array.size * 4) # Check channel mismatch self.assertRaises(ValueError, imageIO.imageArrayToStruct, array, SparkMode.FLOAT32) # Check that unsafe cast raises error floatArray = np.zeros((3, 4, 3), dtype='float32') self.assertRaises(ValueError, imageIO.imageArrayToStruct, floatArray, SparkMode.RGB)
def create_image_dataframe(row): img_array = load_image_from_uri(row.uri) image_dataframe = imageArrayToStruct(img_array) # updated Mode to be 16 _OcvType(name="CV_8UC3", ord=16, nChannels=3, dtype="uint8"), : reference https://github.com/databricks/spark-deep-learning/blob/master/python/sparkdl/image/imageIO.py d = image_dataframe.asDict() d['mode'] = 16 new_row = Row(**d) return new_row , row.label
def test_resize(self): imgAsRow = imageIO.imageArrayToStruct(array) smaller = imageIO._resizeFunction([4, 5]) smallerImg = smaller(imgAsRow) for n in imageIO.imageSchema.names: smallerImg[n] self.assertEqual(smallerImg.height, 4) self.assertEqual(smallerImg.width, 5) sameImage = imageIO._resizeFunction([imgAsRow.height, imgAsRow.width])(imgAsRow) self.assertEqual(sameImage, sameImage) self.assertRaises(ValueError, imageIO._resizeFunction, [1, 2, 3])
def udf_impl(spimg): import numpy as np from PIL import Image from tempfile import NamedTemporaryFile from sparkdl.image.imageIO import imageArrayToStruct, imageType pil_mode = imageType(spimg).pilMode img_shape = (spimg.width, spimg.height) img = Image.frombytes(pil_mode, img_shape, bytes(spimg.data)) # Warning: must use lossless format to guarantee consistency temp_fp = NamedTemporaryFile(suffix='.png') img.save(temp_fp, 'PNG') img_arr_reloaded = preprocessor(temp_fp.name) assert isinstance(img_arr_reloaded, np.ndarray), \ "expect preprocessor to return a numpy array" img_arr_reloaded = img_arr_reloaded.astype(np.uint8) return imageArrayToStruct(img_arr_reloaded)
def create_image(img_in_bytes, height, width, encoding, is_bigendian): import numpy as np import cv2 from sparkdl.image.imageIO import imageArrayToStruct result = None # potentially, due to the time alignment there area image entries with empty data if (img_in_bytes): a = np.fromstring(base64.standard_b64decode(img_in_bytes), dtype=np.uint8) img = a.reshape(height, width, 3) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) result = imageArrayToStruct(img) return result
def udf_impl(spimg): import numpy as np from tempfile import NamedTemporaryFile from sparkdl.image.imageIO import imageArrayToStruct img = imageIO.imageStructToPIL(spimg) # Warning: must use lossless format to guarantee consistency temp_fp = NamedTemporaryFile(suffix='.png') img.save(temp_fp, 'PNG') img_arr_reloaded = preprocessor(temp_fp.name) assert isinstance(img_arr_reloaded, np.ndarray), \ "expect preprocessor to return a numpy array" img_arr_reloaded = img_arr_reloaded.astype(np.uint8) # Keras works in RGB order, need to fix the order img_arr_reloaded = imageIO.fixColorChannelOrdering( currentOrder='RGB', imgAry=img_arr_reloaded) return imageArrayToStruct(img_arr_reloaded)
def test_resize(self): self.assertRaises(ValueError, imageIO.createResizeImageUDF, [1, 2, 3]) make_smaller = imageIO.createResizeImageUDF([4, 5]).func imgAsRow = imageIO.imageArrayToStruct(array) smallerImg = make_smaller(imgAsRow) self.assertEqual(smallerImg.height, 4) self.assertEqual(smallerImg.width, 5) # Compare to PIL resizing imgAsPIL = PIL.Image.fromarray(obj=imageIO._reverseChannels(array)).resize((5, 4)) smallerAry = imageIO._reverseChannels(np.asarray(imgAsPIL)) np.testing.assert_array_equal(smallerAry, imageIO.imageStructToArray(smallerImg)) # Test that resize with the same size is a no-op sameImage = imageIO.createResizeImageUDF((imgAsRow.height, imgAsRow.width)).func(imgAsRow) self.assertEqual(imgAsRow, sameImage) # Test that we have a valid image schema (all fields are in) for n in ImageSchema.imageSchema['image'].dataType.names: smallerImg[n]
def test_resize(self): self.assertRaises(ValueError, imageIO.createResizeImageUDF, [1, 2, 3]) make_smaller = imageIO.createResizeImageUDF([4, 5]).func imgAsRow = imageIO.imageArrayToStruct(array) smallerImg = make_smaller(imgAsRow) self.assertEqual(smallerImg.height, 4) self.assertEqual(smallerImg.width, 5) # Compare to PIL resizing imgAsPIL = PIL.Image.fromarray( obj=imageIO._reverseChannels(array)).resize((5, 4)) smallerAry = imageIO._reverseChannels(np.asarray(imgAsPIL)) np.testing.assert_array_equal(smallerAry, imageIO.imageStructToArray(smallerImg)) # Test that resize with the same size is a no-op sameImage = imageIO.createResizeImageUDF( (imgAsRow.height, imgAsRow.width)).func(imgAsRow) self.assertEqual(imgAsRow, sameImage) # Test that we have a valid image schema (all fields are in) for n in ImageSchema.imageSchema['image'].dataType.names: smallerImg[n]
def load_image_uri_impl(uri): try: return imageArrayToStruct(loader(uri)) except: # pylint: disable=bare-except return None
#arr = rdd.take(1)[0] # #Image.open(BytesIO(arr)) # COMMAND ---------- from PIL import Image from io import BytesIO from pyspark.sql.types import BinaryType, StructType, StructField from functools import partial rdd = fin.flatMap( partial(msg_map, func=lambda r: r.data, conn=conn_d['/center_camera/image_color/compressed']) ) rddTuple = rdd.map(lambda x: (bytearray(x),)) schema = StructType([StructField('rawdata', BinaryType(), False)]) df = rddTuple.toDF(schema) df.cache() # COMMAND ---------- from sparkdl.image.imageIO import PIL_decode, imageArrayToStruct from pyspark.sql.functions import col from pyspark.ml.image import ImageSchema imageUdf = udf(lambda b: imageArrayToStruct(PIL_decode(b)), ImageSchema.imageSchema['image'].dataType) img = df.withColumn('image', imageUdf(col('rawdata'))) display(img.select('image'))
def do_nothing(imgRow): array = imageIO.imageStructToArray(imgRow) return imageIO.imageArrayToStruct(array)
def keras_load_spimg(fpath): return imageArrayToStruct(keras_load_img(fpath))
def pil_load_spimg(fpath): from PIL import Image import numpy as np img_arr = np.array(Image.open(fpath), dtype=np.uint8) return imageArrayToStruct(img_arr)
def load(uri): img = loader(uri) return imageIO.imageArrayToStruct(img)
def rowWithImage(img): # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)] row = imageIO.imageArrayToStruct(img.astype('uint8'), imageIO.SparkMode.RGB) # re-order row to avoid pyspark bug return [[getattr(row, field.name) for field in imageIO.imageSchema]]
def do_nothing(imgRow): imType = imageIO.imageType(imgRow) array = imageIO.imageStructToArray(imgRow) return imageIO.imageArrayToStruct(array, imType.sparkMode)
def load_image_uri_impl(uri): try: return imageArrayToStruct(_reverseChannels(loader(uri))) except BaseException: # pylint: disable=bare-except return None
def pil_load_spimg(fpath): from PIL import Image import numpy as np img_arr = np.array(Image.open(fpath), dtype=np.uint8) # PIL is RGB, image schema is BGR => need to flip the channels return imageArrayToStruct(_reverseChannels(img_arr))
def keras_load_spimg(fpath): # Keras loads image in RGB order, ImageSchema expects BGR => need to flip return imageArrayToStruct(_reverseChannels(keras_load_img(fpath)))
def check_image_round_trip(img_arr): spimg_dict = imageArrayToStruct(img_arr).asDict() spimg_dict['data'] = bytes(spimg_dict['data']) img_arr_out = exec_gfn_spimg_decode(spimg_dict, spimg_dict['mode']) self.assertTrue(np.all(img_arr_out == img_arr))