예제 #1
0
    def test_keras_consistency(self):
        """ Exported model in Keras should get same result as original """

        img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg'))

        def keras_load_and_preproc(fpath):
            img = load_img(fpath, target_size=(299, 299))
            img_arr = img_to_array(img)
            img_iv3_input = iv3.preprocess_input(img_arr)
            return np.expand_dims(img_iv3_input, axis=0)

        imgs_iv3_input = np.vstack([keras_load_and_preproc(fp) for fp in img_fpaths])

        model_ref = InceptionV3(weights="imagenet")
        preds_ref = model_ref.predict(imgs_iv3_input)

        with IsolatedSession(using_keras=True) as issn:
            K.set_learning_phase(0)
            model = InceptionV3(weights="imagenet")
            gfn = issn.asGraphFunction(model.inputs, model.outputs)

        with IsolatedSession(using_keras=True) as issn:
            K.set_learning_phase(0)
            feeds, fetches = issn.importGraphFunction(gfn, prefix="InceptionV3")
            preds_tgt = issn.run(fetches[0], {feeds[0]: imgs_iv3_input})

        self.assertTrue(np.all(preds_tgt == preds_ref))
예제 #2
0
    def test_import_export_graph_function(self):
        """ Function import and export must be consistent """

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')
            gfn_ref = issn.asGraphFunction([x], [z])

        with IsolatedSession() as issn:
            feeds, fetches = issn.importGraphFunction(gfn_ref, prefix="")
            gfn_tgt = issn.asGraphFunction(feeds, fetches)

        self.assertEqual(gfn_tgt.input_names, gfn_ref.input_names)
        self.assertEqual(gfn_tgt.output_names, gfn_ref.output_names)
        self.assertEqual(str(gfn_tgt.graph_def), str(gfn_ref.graph_def))
    def test_simple_keras_udf(self):
        """ Simple Keras sequential model """
        # Notice that the input layer for a image UDF model
        # must be of shape (width, height, numChannels)
        # The leading batch size is taken care of by Keras
        with IsolatedSession(using_keras=True) as issn:
            model = Sequential()
            model.add(Flatten(input_shape=(640,480,3)))
            model.add(Dense(units=64))
            model.add(Activation('relu'))
            model.add(Dense(units=10))
            model.add(Activation('softmax'))
            # Initialize the variables
            init_op = tf.global_variables_initializer()
            issn.run(init_op)
            makeGraphUDF(issn.graph,
                         'my_keras_model_udf',
                         model.outputs,
                         {tfx.op_name(issn.graph, model.inputs[0]): 'image_col'})
            # Run the training procedure
            # Export the graph in this IsolatedSession as a GraphFunction
            # gfn = issn.asGraphFunction(model.inputs, model.outputs)
            fh_name = "test_keras_simple_sequential_model"
            registerKerasImageUDF(fh_name, model)

        self._assert_function_exists(fh_name)
예제 #4
0
    def test_tf_consistency(self):
        """ Should get the same graph as running pure tf """

        x_val = 2702.142857
        g = tf.Graph()
        with tf.Session(graph=g) as sess:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')
            gdef_ref = g.as_graph_def(add_shapes=True)
            z_ref = sess.run(z, {x: x_val})

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')
            gfn = issn.asGraphFunction([x], [z])
            z_tgt = issn.run(z, {x: x_val})

        self.assertEqual(z_ref, z_tgt)

        # Version texts are not essential part of the graph, ignore them
        gdef_ref.ClearField("versions")
        gfn.graph_def.ClearField("versions")

        # The GraphDef contained in the GraphFunction object
        # should be the same as that in the one exported directly from TensorFlow session
        self.assertEqual(str(gfn.graph_def), str(gdef_ref))
    def test_identity_module(self):
        """ identity module should preserve input """

        with IsolatedSession() as issn:
            pred_input = tf.placeholder(tf.float32, [None, None])
            final_output = tf.identity(pred_input, name='output')
            gfn = issn.asGraphFunction([pred_input], [final_output])

        for _ in range(10):
            m, n = prng.randint(10, 1000, size=2)
            mat = prng.randn(m, n).astype(np.float32)
            with IsolatedSession() as issn:
                feeds, fetches = issn.importGraphFunction(gfn)
                mat_out = issn.run(fetches[0], {feeds[0]: mat})

            self.assertTrue(np.all(mat_out == mat))
    def test_pipeline(self):
        """ Pipeline should provide correct function composition """
        img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg'))

        xcpt_model = Xception(weights="imagenet")
        stages = [('spimage',
                   gfac.buildSpImageConverter(SparkMode.RGB_FLOAT32)),
                  ('xception', GraphFunction.fromKeras(xcpt_model))]
        piped_model = GraphFunction.fromList(stages)

        for fpath in img_fpaths:
            target_size = tuple(xcpt_model.input.shape.as_list()[1:-1])
            img = load_img(fpath, target_size=target_size)
            img_arr = np.expand_dims(img_to_array(img), axis=0)
            img_input = xcpt.preprocess_input(img_arr)
            preds_ref = xcpt_model.predict(img_input)

            spimg_input_dict = imageArrayToStruct(img_input).asDict()
            spimg_input_dict['data'] = bytes(spimg_input_dict['data'])
            with IsolatedSession() as issn:
                # Need blank import scope name so that spimg fields match the input names
                feeds, fetches = issn.importGraphFunction(piped_model,
                                                          prefix="")
                feed_dict = dict(
                    (tnsr, spimg_input_dict[tfx.op_name(tnsr, issn.graph)])
                    for tnsr in feeds)
                preds_tgt = issn.run(fetches[0], feed_dict=feed_dict)
                # Uncomment the line below to see the graph
                # tfx.write_visualization_html(issn.graph,
                #                              NamedTemporaryFile(prefix="gdef", suffix=".html").name)

            self.assertTrue(np.all(preds_tgt == preds_ref))
    def test_bare_keras_module(self):
        """ Keras GraphFunctions should give the same result as standard Keras models """
        img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg'))

        for model_gen, preproc_fn in [(InceptionV3, iv3.preprocess_input),
                                      (Xception, xcpt.preprocess_input),
                                      (ResNet50, rsnt.preprocess_input)]:

            keras_model = model_gen(weights="imagenet")
            target_size = tuple(keras_model.input.shape.as_list()[1:-1])

            _preproc_img_list = []
            for fpath in img_fpaths:
                img = load_img(fpath, target_size=target_size)
                # WARNING: must apply expand dimensions first, or ResNet50 preprocessor fails
                img_arr = np.expand_dims(img_to_array(img), axis=0)
                _preproc_img_list.append(preproc_fn(img_arr))

            imgs_input = np.vstack(_preproc_img_list)

            preds_ref = keras_model.predict(imgs_input)

            gfn_bare_keras = GraphFunction.fromKeras(keras_model)

            with IsolatedSession(using_keras=True) as issn:
                K.set_learning_phase(0)
                feeds, fetches = issn.importGraphFunction(gfn_bare_keras)
                preds_tgt = issn.run(fetches[0], {feeds[0]: imgs_input})

            self.assertTrue(np.all(preds_tgt == preds_ref))
    def test_tf_consistency(self):
        """ Should get the same graph as running pure tf """

        x_val = 2702.142857
        g = tf.Graph()
        with tf.Session(graph=g) as sess:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')
            gdef_ref = g.as_graph_def(add_shapes=True)
            z_ref = sess.run(z, {x: x_val})

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')
            gfn = issn.asGraphFunction([x], [z])
            z_tgt = issn.run(z, {x: x_val})

        self.assertEqual(z_ref, z_tgt)

        # Remove all fields besides "node" from the graph definition, since we only
        # care that the nodes are equal
        # TODO(sid.murching) find a cleaner way of removing all fields besides "node"
        nonessentialFields = ["versions", "version", "library"]
        for fieldName in nonessentialFields:
            gdef_ref.ClearField(fieldName)
            gfn.graph_def.ClearField(fieldName)

        # The GraphDef contained in the GraphFunction object
        # should be the same as that in the one exported directly from TensorFlow session
        self.assertEqual(str(gfn.graph_def), str(gdef_ref))
예제 #9
0
 def exec_gfn_spimg_decode(spimg_dict, img_dtype):
     gfn = gfac.buildSpImageConverter('BGR', img_dtype)
     with IsolatedSession() as issn:
         feeds, fetches = issn.importGraphFunction(gfn, prefix="")
         feed_dict = dict(
             (tnsr, spimg_dict[tfx.op_name(tnsr, issn.graph)]) for tnsr in feeds)
         img_out = issn.run(fetches[0], feed_dict=feed_dict)
     return img_out
예제 #10
0
def buildFlattener():
    """
    Build a flattening layer to remove the extra leading tensor dimension.
    e.g. a tensor of shape [1, W, H, C] will have a shape [W, H, C] after applying this.
    """
    with IsolatedSession() as issn:
        mat_input = tf.placeholder(tf.float32, [None, None])
        mat_output = tf.identity(tf.reshape(mat_input, shape=[-1]),
                                 name='output')
        gfn = issn.asGraphFunction([mat_input], [mat_output])

    return gfn
예제 #11
0
    def test_flattener_module(self):
        """ flattener module should preserve input data """

        gfn = gfac.buildFlattener()
        for _ in range(10):
            m, n = prng.randint(10, 1000, size=2)
            mat = prng.randn(m, n).astype(np.float32)
            with IsolatedSession() as issn:
                feeds, fetches = issn.importGraphFunction(gfn)
                vec_out = issn.run(fetches[0], {feeds[0]: mat})

            self.assertTrue(np.all(vec_out == mat.flatten()))
    def test_map_rows_sql_1(self):
        data = [Row(x=float(x)) for x in range(5)]
        df = self.sql.createDataFrame(data)
        with IsolatedSession() as issn:
            # The placeholder that corresponds to column 'x' as a whole column
            x = tf.placeholder(tf.double, shape=[], name="x")
            # The output that adds 3 to x
            z = tf.add(x, 3, name='z')
            # Let's register these computations in SQL.
            makeGraphUDF(issn.graph, "map_rows_sql_1", [z])

        # Here we go, for the SQL users, straight from PySpark.
        df2 = df.selectExpr("map_rows_sql_1(x) AS z")
        print("df2 = %s" % df2)
        data2 = df2.collect()
        assert data2[0].z == 3.0, data2
예제 #13
0
    def test_get_graph_elements(self):
        """ Fetching graph elements by names and other graph elements """

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')

            g = issn.graph
            self.assertEqual(tfx.get_tensor(g, z), z)
            self.assertEqual(tfx.get_tensor(g, x), x)
            self.assertEqual(g.get_tensor_by_name("x:0"), tfx.get_tensor(g, x))
            self.assertEqual("x:0", tfx.tensor_name(g, x))
            self.assertEqual(g.get_operation_by_name("x"), tfx.get_op(g, x))
            self.assertEqual("x", tfx.op_name(g, x))
            self.assertEqual("z", tfx.op_name(g, z))
            self.assertEqual(tfx.tensor_name(g, z), "z:0")
            self.assertEqual(tfx.tensor_name(g, x), "x:0")
예제 #14
0
def buildSpImageConverter(channelOrder, img_dtype):
    """
    Convert a imageIO byte encoded image into a image tensor suitable as input to ConvNets
    The name of the input must be a subset of those specified in `image.imageIO.imageSchema`.

    :param img_dtype: the type of data the underlying image bytes represent
    """
    with IsolatedSession() as issn:
        # Flat image data -> image dimensions
        # This has to conform to `imageIO.imageSchema`
        height = tf.placeholder(tf.int32, [], name="height")
        width = tf.placeholder(tf.int32, [], name="width")
        num_channels = tf.placeholder(tf.int32, [], name="nChannels")
        image_buffer = tf.placeholder(tf.string, [], name="data")

        # The image is packed into bytes with height as leading dimension
        # This is the default behavior of Python Image Library
        shape = tf.reshape(tf.stack([height, width, num_channels], axis=0),
                           shape=(3, ),
                           name='shape')
        if img_dtype == 'uint8':
            image_uint8 = tf.decode_raw(image_buffer,
                                        tf.uint8,
                                        name="decode_raw")
            image_float = tf.to_float(image_uint8)
        elif img_dtype == 'float32':
            image_float = tf.decode_raw(image_buffer,
                                        tf.float32,
                                        name="decode_raw")
        else:
            raise ValueError(
                '''unsupported image data type "%s", currently only know how to
            handle uint8 and float32''' % img_dtype)
        image_reshaped = tf.reshape(image_float, shape, name="reshaped")
        image_reshaped = imageIO.fixColorChannelOrdering(
            channelOrder, image_reshaped)
        image_input = tf.expand_dims(image_reshaped, 0, name="image_input")
        gfn = issn.asGraphFunction([height, width, image_buffer, num_channels],
                                   [image_input])

    return gfn
예제 #15
0
def buildSpImageConverter(img_dtype):
    """
    Convert a imageIO byte encoded image into a image tensor suitable as input to ConvNets
    The name of the input must be a subset of those specified in `image.imageIO.imageSchema`.

    :param img_dtype: the type of data the underlying image bytes represent
    """
    with IsolatedSession() as issn:
        # Flat image data -> image dimensions
        # This has to conform to `imageIO.imageSchema`
        height = tf.placeholder(tf.int32, [], name="height")
        width = tf.placeholder(tf.int32, [], name="width")
        num_channels = tf.placeholder(tf.int32, [], name="nChannels")
        image_buffer = tf.placeholder(tf.string, [], name="data")

        # The image is packed into bytes with height as leading dimension
        # This is the default behavior of Python Image Library
        shape = tf.reshape(tf.stack([height, width, num_channels], axis=0),
                           shape=(3, ),
                           name='shape')
        if img_dtype == SparkMode.RGB:
            image_uint8 = tf.decode_raw(image_buffer,
                                        tf.uint8,
                                        name="decode_raw")
            image_float = tf.to_float(image_uint8)
        else:
            assert img_dtype == SparkMode.RGB_FLOAT32, \
                "Unsupported dtype for image: {}".format(img_dtype)
            image_float = tf.decode_raw(image_buffer,
                                        tf.float32,
                                        name="decode_raw")

        image_reshaped = tf.reshape(image_float, shape, name="reshaped")
        image_input = tf.expand_dims(image_reshaped, 0, name="image_input")
        gfn = issn.asGraphFunction([height, width, image_buffer, num_channels],
                                   [image_input])

    return gfn
def registerKerasImageUDF(udf_name,
                          keras_model_or_file_path,
                          preprocessor=None):
    """
    Create a Keras image model as a Spark SQL UDF.
    The UDF takes a column (formatted in :py:const:`sparkdl.image.imageIO.imageSchema`)
    and produces the output of the given Keras model (e.g.
    for `Inception V3 <https://keras.io/applications/#inceptionv3]>`_
    it produces a real valued score vector over the ImageNet object categories).
    For other models, the output could have different meanings.
    Please consult the actual models specification.

    The user can provide an existing model in Keras as follows.

    .. code-block:: python

        from keras.applications import InceptionV3
        registerKerasImageUDF("udf_name", InceptionV3(weights="imagenet"))

    To use a customized Keras model, we can save it and pass the file path as parameter.

    .. code-block:: python

        # Assume we have a compiled and trained Keras model
        model.save('path/to/my/model.h5')

        registerKerasImageUDF("my_custom_keras_model_udf", "path/to/my/model.h5")

    If there are further preprocessing steps are required to prepare the images,
    the user has the option to provide a preprocessing function :py:obj:`preprocessor`.
    The :py:obj:`preprocessor` converts a file path into a image array.
    This function is usually introduced in Keras workflow, as in the following example.

    .. warning:: There is a performance penalty to use a :py:obj:`preprocessor` as it will
                 first convert the image into a file buffer and reloaded back.
                 This provides compatibility with the usual way Keras model input are preprocessed.
                 Please consider directly using Keras/TensorFlow layers for this purpose.

    .. code-block:: python

        def keras_load_img(fpath):
            from keras.preprocessing.image import load_img, img_to_array
            import numpy as np
            from pyspark.sql import Row
            img = load_img(fpath, target_size=(299, 299))
            return img_to_array(img).astype(np.uint8)

        registerKerasImageUDF("my_inception_udf", InceptionV3(weights="imagenet"), keras_load_img)


    If the `preprocessor` is not provided, we assume the function will be applied to
    a (struct) column encoded in [sparkdl.image.imageIO.imageSchema].
    The output will be a single (struct) column containing the resulting tensor data.

    :param udf_name: str, name of the UserDefinedFunction. If the name exists, it will be
    overwritten.
    :param keras_model_or_file_path: str or KerasModel,
                                     either a path to the HDF5 Keras model file
                                     or an actual loaded Keras model
    :param preprocessor: function, optional, a function that
                         converts image file path to image tensor/ndarray
                         in the correct shape to be served as input to the Keras model
    :return: :py:class:`GraphFunction`, the graph function for the Keras image model
    """
    warnings.warn(
        "registerKerasImageUDF() will be removed in the next release of sparkdl. "
        "Please use Pandas UDF for distributed model inference.",
        DeprecationWarning)
    ordered_udf_names = []
    keras_udf_name = udf_name
    if preprocessor is not None:
        # Spill the image structure to file and reload it
        # with the user provided preprocessing funcition
        preproc_udf_name = '{}__preprocess'.format(udf_name)
        ordered_udf_names.append(preproc_udf_name)
        JVMAPI.registerUDF(preproc_udf_name,
                           _serialize_and_reload_with(preprocessor),
                           ImageSchema.imageSchema['image'].dataType)
        keras_udf_name = '{}__model_predict'.format(udf_name)

    stages = [('spimg', buildSpImageConverter('RGB', "uint8")),
              ('model', GraphFunction.fromKeras(keras_model_or_file_path)),
              ('final', buildFlattener())]
    gfn = GraphFunction.fromList(stages)

    with IsolatedSession() as issn:
        _, fetches = issn.importGraphFunction(gfn, prefix='')
        makeGraphUDF(issn.graph, keras_udf_name, fetches)
        ordered_udf_names.append(keras_udf_name)

    if len(ordered_udf_names) > 1:
        msg = "registering pipelined UDF {udf} with stages {udfs}"
        msg = msg.format(udf=udf_name, udfs=ordered_udf_names)
        logger.info(msg)
        JVMAPI.registerPipeline(udf_name, ordered_udf_names)

    return gfn