def test_pretrained_keras_udf(self): """ Must be able to register a pretrained image model as UDF """ # Register an InceptionV3 model fh_name = "test_keras_pretrained_iv3_model" registerKerasImageUDF(fh_name, InceptionV3(weights="imagenet")) self._assert_function_exists(fh_name)
def test_simple_keras_udf(self): """ Simple Keras sequential model """ # Notice that the input layer for a image UDF model # must be of shape (width, height, numChannels) # The leading batch size is taken care of by Keras with IsolatedSession(using_keras=True) as issn: model = Sequential() model.add(Flatten(input_shape=(640,480,3))) model.add(Dense(units=64)) model.add(Activation('relu')) model.add(Dense(units=10)) model.add(Activation('softmax')) # Initialize the variables init_op = tf.global_variables_initializer() issn.run(init_op) makeGraphUDF(issn.graph, 'my_keras_model_udf', model.outputs, {tfx.op_name(issn.graph, model.inputs[0]): 'image_col'}) # Run the training procedure # Export the graph in this IsolatedSession as a GraphFunction # gfn = issn.asGraphFunction(model.inputs, model.outputs) fh_name = "test_keras_simple_sequential_model" registerKerasImageUDF(fh_name, model) self._assert_function_exists(fh_name)
def test_composite_udf(self): """ Composite Keras Image UDF registration """ df = get_image_paths_df(self.sql) def keras_load_img(fpath): from keras.preprocessing.image import load_img, img_to_array import numpy as np from pyspark.sql import Row img = load_img(fpath, target_size=(299, 299)) return img_to_array(img).astype(np.uint8) def pil_load_spimg(fpath): from PIL import Image import numpy as np img_arr = np.array(Image.open(fpath), dtype=np.uint8) # PIL is RGB, image schema is BGR => need to flip the channels return imageArrayToStruct(_reverseChannels(img_arr)) def keras_load_spimg(fpath): # Keras loads image in RGB order, ImageSchema expects BGR => need to flip return imageArrayToStruct(_reverseChannels(keras_load_img(fpath))) # Load image with Keras and store it in our image schema JVMAPI.registerUDF('keras_load_spimg', keras_load_spimg, ImageSchema.imageSchema['image'].dataType) JVMAPI.registerUDF('pil_load_spimg', pil_load_spimg, ImageSchema.imageSchema['image'].dataType) # Register an InceptionV3 model registerKerasImageUDF("iv3_img_pred", InceptionV3(weights="imagenet"), keras_load_img) run_sql = self.session.sql # Choice 1: manually chain the functions in SQL df1 = run_sql( "select iv3_img_pred(keras_load_spimg(fpath)) as preds from _test_image_paths_df" ) preds1 = np.array(df1.select("preds").rdd.collect()) # Choice 2: build a pipelined UDF and directly use it in SQL JVMAPI.registerPipeline("load_img_then_iv3_pred", ["keras_load_spimg", "iv3_img_pred"]) df2 = run_sql( "select load_img_then_iv3_pred(fpath) as preds from _test_image_paths_df" ) preds2 = np.array(df2.select("preds").rdd.collect()) # Choice 3: create the image tensor input table first and apply the Keras model df_images = run_sql( "select pil_load_spimg(fpath) as image from _test_image_paths_df") df_images.createOrReplaceTempView("_test_images_df") df3 = run_sql( "select iv3_img_pred(image) as preds from _test_images_df") preds3 = np.array(df3.select("preds").rdd.collect()) self.assertTrue(len(preds1) == len(preds2)) np.testing.assert_allclose(preds1, preds2) np.testing.assert_allclose(preds2, preds3)
def test_composite_udf(self): """ Composite Keras Image UDF registration """ df = get_image_paths_df(self.sql) def keras_load_img(fpath): from keras.preprocessing.image import load_img, img_to_array import numpy as np from pyspark.sql import Row img = load_img(fpath, target_size=(299, 299)) return img_to_array(img).astype(np.uint8) def pil_load_spimg(fpath): from PIL import Image import numpy as np img_arr = np.array(Image.open(fpath), dtype=np.uint8) return imageArrayToStruct(img_arr) def keras_load_spimg(fpath): return imageArrayToStruct(keras_load_img(fpath)) # Load image with Keras and store it in our image schema JVMAPI.registerUDF('keras_load_spimg', keras_load_spimg, imageSchema) JVMAPI.registerUDF('pil_load_spimg', pil_load_spimg, imageSchema) # Register an InceptionV3 model registerKerasImageUDF("iv3_img_pred", InceptionV3(weights="imagenet"), keras_load_img) run_sql = self.session.sql # Choice 1: manually chain the functions in SQL df1 = run_sql("select iv3_img_pred(keras_load_spimg(fpath)) as preds from _test_image_paths_df") preds1 = np.array(df1.select("preds").rdd.collect()) # Choice 2: build a pipelined UDF and directly use it in SQL JVMAPI.registerPipeline("load_img_then_iv3_pred", ["keras_load_spimg", "iv3_img_pred"]) df2 = run_sql("select load_img_then_iv3_pred(fpath) as preds from _test_image_paths_df") preds2 = np.array(df2.select("preds").rdd.collect()) # Choice 3: create the image tensor input table first and apply the Keras model df_images = run_sql("select pil_load_spimg(fpath) as image from _test_image_paths_df") df_images.createOrReplaceTempView("_test_images_df") df3 = run_sql("select iv3_img_pred(image) as preds from _test_images_df") preds3 = np.array(df3.select("preds").rdd.collect()) self.assertTrue(len(preds1) == len(preds2)) np.testing.assert_allclose(preds1, preds2) np.testing.assert_allclose(preds2, preds3)
p1 = udf(_p1, DoubleType()) df = tested_df.withColumn("p_1", p1(tested_df.probability)) wrong_df = df.orderBy(expr("abs(p_1 - label)"), ascending=False) wrong_df.select("filePath", "p_1", "label").limit(10).show() # COMMAND ---------- from sparkdl import readImages, DeepImagePredictor image_df = readImages(img_dir) predictor = DeepImagePredictor(inputCol="image", outputCol="predicted_labels", modelName="InceptionV3", decodePredictions=True, topK=10) predictions_df = predictor.transform(image_df) # COMMAND ---------- df = p_model.transform(image_df) # COMMAND ---------- from keras.applications import InceptionV3 from sparkdl.udf.keras_image_model import registerKerasImageUDF from keras.applications import InceptionV3 registerKerasImageUDF("my_keras_inception_udf", InceptionV3(weights="imagenet")) # COMMAND ----------
model.add(Dense(units=1, activation='sigmoid')) model_path = "simple-binary-classification" model.save(model_path) # Create transformer and apply it to our input data transformer = KerasTransformer(inputCol="features", outputCol="predictions", modelFile=model_path) final_df = transformer.transform(input_df) final_df.show() from keras.applications import InceptionV3 from sparkdl.udf.keras_image_model import registerKerasImageUDF registerKerasImageUDF("inceptionV3_udf", InceptionV3(weights="imagenet")) from keras.applications import InceptionV3 from sparkdl.udf.keras_image_model import registerKerasImageUDF def keras_load_img(fpath): from keras.preprocessing.image import load_img, img_to_array import numpy as np img = load_img(fpath, target_size=(299, 299)) return img_to_array(img).astype(np.uint8) registerKerasImageUDF("inceptionV3_udf_with_preprocessing", InceptionV3(weights="imagenet"), keras_load_img)