def test_pretrained_keras_udf(self):
     """ Must be able to register a pretrained image model as UDF """
     # Register an InceptionV3 model
     fh_name = "test_keras_pretrained_iv3_model"
     registerKerasImageUDF(fh_name,
                           InceptionV3(weights="imagenet"))
     self._assert_function_exists(fh_name)
    def test_simple_keras_udf(self):
        """ Simple Keras sequential model """
        # Notice that the input layer for a image UDF model
        # must be of shape (width, height, numChannels)
        # The leading batch size is taken care of by Keras
        with IsolatedSession(using_keras=True) as issn:
            model = Sequential()
            model.add(Flatten(input_shape=(640,480,3)))
            model.add(Dense(units=64))
            model.add(Activation('relu'))
            model.add(Dense(units=10))
            model.add(Activation('softmax'))
            # Initialize the variables
            init_op = tf.global_variables_initializer()
            issn.run(init_op)
            makeGraphUDF(issn.graph,
                         'my_keras_model_udf',
                         model.outputs,
                         {tfx.op_name(issn.graph, model.inputs[0]): 'image_col'})
            # Run the training procedure
            # Export the graph in this IsolatedSession as a GraphFunction
            # gfn = issn.asGraphFunction(model.inputs, model.outputs)
            fh_name = "test_keras_simple_sequential_model"
            registerKerasImageUDF(fh_name, model)

        self._assert_function_exists(fh_name)
    def test_composite_udf(self):
        """ Composite Keras Image UDF registration """
        df = get_image_paths_df(self.sql)

        def keras_load_img(fpath):
            from keras.preprocessing.image import load_img, img_to_array
            import numpy as np
            from pyspark.sql import Row
            img = load_img(fpath, target_size=(299, 299))
            return img_to_array(img).astype(np.uint8)

        def pil_load_spimg(fpath):
            from PIL import Image
            import numpy as np
            img_arr = np.array(Image.open(fpath), dtype=np.uint8)
            # PIL is RGB, image schema is BGR => need to flip the channels
            return imageArrayToStruct(_reverseChannels(img_arr))

        def keras_load_spimg(fpath):
            # Keras loads image in RGB order, ImageSchema expects BGR => need to flip
            return imageArrayToStruct(_reverseChannels(keras_load_img(fpath)))

        # Load image with Keras and store it in our image schema
        JVMAPI.registerUDF('keras_load_spimg', keras_load_spimg,
                           ImageSchema.imageSchema['image'].dataType)
        JVMAPI.registerUDF('pil_load_spimg', pil_load_spimg,
                           ImageSchema.imageSchema['image'].dataType)

        # Register an InceptionV3 model
        registerKerasImageUDF("iv3_img_pred", InceptionV3(weights="imagenet"),
                              keras_load_img)

        run_sql = self.session.sql

        # Choice 1: manually chain the functions in SQL
        df1 = run_sql(
            "select iv3_img_pred(keras_load_spimg(fpath)) as preds from _test_image_paths_df"
        )
        preds1 = np.array(df1.select("preds").rdd.collect())

        # Choice 2: build a pipelined UDF and directly use it in SQL
        JVMAPI.registerPipeline("load_img_then_iv3_pred",
                                ["keras_load_spimg", "iv3_img_pred"])
        df2 = run_sql(
            "select load_img_then_iv3_pred(fpath) as preds from _test_image_paths_df"
        )
        preds2 = np.array(df2.select("preds").rdd.collect())

        # Choice 3: create the image tensor input table first and apply the Keras model
        df_images = run_sql(
            "select pil_load_spimg(fpath) as image from _test_image_paths_df")
        df_images.createOrReplaceTempView("_test_images_df")
        df3 = run_sql(
            "select iv3_img_pred(image) as preds from _test_images_df")
        preds3 = np.array(df3.select("preds").rdd.collect())

        self.assertTrue(len(preds1) == len(preds2))
        np.testing.assert_allclose(preds1, preds2)
        np.testing.assert_allclose(preds2, preds3)
    def test_composite_udf(self):
        """ Composite Keras Image UDF registration """
        df = get_image_paths_df(self.sql)

        def keras_load_img(fpath):
            from keras.preprocessing.image import load_img, img_to_array
            import numpy as np
            from pyspark.sql import Row
            img = load_img(fpath, target_size=(299, 299))
            return img_to_array(img).astype(np.uint8)

        def pil_load_spimg(fpath):
            from PIL import Image
            import numpy as np
            img_arr = np.array(Image.open(fpath), dtype=np.uint8)
            return imageArrayToStruct(img_arr)

        def keras_load_spimg(fpath):
            return imageArrayToStruct(keras_load_img(fpath))

        # Load image with Keras and store it in our image schema
        JVMAPI.registerUDF('keras_load_spimg', keras_load_spimg, imageSchema)
        JVMAPI.registerUDF('pil_load_spimg', pil_load_spimg, imageSchema)

        # Register an InceptionV3 model
        registerKerasImageUDF("iv3_img_pred",
                              InceptionV3(weights="imagenet"),
                              keras_load_img)

        run_sql = self.session.sql

        # Choice 1: manually chain the functions in SQL
        df1 = run_sql("select iv3_img_pred(keras_load_spimg(fpath)) as preds from _test_image_paths_df")
        preds1 = np.array(df1.select("preds").rdd.collect())

        # Choice 2: build a pipelined UDF and directly use it in SQL
        JVMAPI.registerPipeline("load_img_then_iv3_pred", ["keras_load_spimg", "iv3_img_pred"])
        df2 = run_sql("select load_img_then_iv3_pred(fpath) as preds from _test_image_paths_df")
        preds2 = np.array(df2.select("preds").rdd.collect())

        # Choice 3: create the image tensor input table first and apply the Keras model
        df_images = run_sql("select pil_load_spimg(fpath) as image from _test_image_paths_df")
        df_images.createOrReplaceTempView("_test_images_df")
        df3 = run_sql("select iv3_img_pred(image) as preds from _test_images_df")
        preds3 = np.array(df3.select("preds").rdd.collect())

        self.assertTrue(len(preds1) == len(preds2))
        np.testing.assert_allclose(preds1, preds2)
        np.testing.assert_allclose(preds2, preds3)
p1 = udf(_p1, DoubleType())
df = tested_df.withColumn("p_1", p1(tested_df.probability))
wrong_df = df.orderBy(expr("abs(p_1 - label)"), ascending=False)
wrong_df.select("filePath", "p_1", "label").limit(10).show()

# COMMAND ----------

from sparkdl import readImages, DeepImagePredictor
image_df = readImages(img_dir)
predictor = DeepImagePredictor(inputCol="image",
                               outputCol="predicted_labels",
                               modelName="InceptionV3",
                               decodePredictions=True,
                               topK=10)
predictions_df = predictor.transform(image_df)

# COMMAND ----------

df = p_model.transform(image_df)

# COMMAND ----------

from keras.applications import InceptionV3
from sparkdl.udf.keras_image_model import registerKerasImageUDF
from keras.applications import InceptionV3
registerKerasImageUDF("my_keras_inception_udf",
                      InceptionV3(weights="imagenet"))

# COMMAND ----------
Example #6
0
model.add(Dense(units=1, activation='sigmoid'))
model_path = "simple-binary-classification"
model.save(model_path)

# Create transformer and apply it to our input data
transformer = KerasTransformer(inputCol="features",
                               outputCol="predictions",
                               modelFile=model_path)
final_df = transformer.transform(input_df)

final_df.show()

from keras.applications import InceptionV3
from sparkdl.udf.keras_image_model import registerKerasImageUDF

registerKerasImageUDF("inceptionV3_udf", InceptionV3(weights="imagenet"))

from keras.applications import InceptionV3
from sparkdl.udf.keras_image_model import registerKerasImageUDF


def keras_load_img(fpath):
    from keras.preprocessing.image import load_img, img_to_array
    import numpy as np
    img = load_img(fpath, target_size=(299, 299))
    return img_to_array(img).astype(np.uint8)


registerKerasImageUDF("inceptionV3_udf_with_preprocessing",
                      InceptionV3(weights="imagenet"), keras_load_img)