예제 #1
0
    def test_saved_model_iomap(self):
        with _make_temp_directory() as tmp_dir:
            saved_model_dir = os.path.join(tmp_dir, 'saved_model')
            graph = tf.Graph()
            with tf.Session(graph=graph) as sess, graph.as_default():
                _build_graph()
                _build_saved_model(sess, saved_model_dir)
                # Build the transformer from exported serving model
                # We are using signatures, thus must provide the keys
                gin = TFInputGraph.fromSavedModelWithSignature(saved_model_dir, _serving_tag,
                                                               _serving_sigdef_key)

                _input_mapping_with_sigdef = {'inputCol': _tensor_input_signature}
                # Input mapping for the Transformer
                _translated_input_mapping = gin.translateInputMapping(_input_mapping_with_sigdef)
                _expected_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
                # Output mapping for the Transformer
                _output_mapping_with_sigdef = {_tensor_output_signature: 'outputCol'}
                _translated_output_mapping = gin.translateOutputMapping(_output_mapping_with_sigdef)
                _expected_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}

                err_msg = "signature based input mapping {} and output mapping {} " + \
                          "must be translated correctly into tensor name based mappings"
                assert _translated_input_mapping == _expected_input_mapping \
                    and _translated_output_mapping == _expected_output_mapping, \
                    err_msg.format(_translated_input_mapping, _translated_output_mapping)
예제 #2
0
    def test_saved_model_iomap(self):
        with _make_temp_directory() as tmp_dir:
            saved_model_dir = os.path.join(tmp_dir, 'saved_model')
            graph = tf.Graph()
            with tf.Session(graph=graph) as sess, graph.as_default():
                _build_graph()
                _build_saved_model(sess, saved_model_dir)
                # Build the transformer from exported serving model
                # We are using signatures, thus must provide the keys
                gin = TFInputGraph.fromSavedModelWithSignature(saved_model_dir, _serving_tag,
                                                               _serving_sigdef_key)

                _input_mapping_with_sigdef = {'inputCol': _tensor_input_signature}
                # Input mapping for the Transformer
                _translated_input_mapping = gin.translateInputMapping(_input_mapping_with_sigdef)
                _expected_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
                # Output mapping for the Transformer
                _output_mapping_with_sigdef = {_tensor_output_signature: 'outputCol'}
                _translated_output_mapping = gin.translateOutputMapping(_output_mapping_with_sigdef)
                _expected_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}

                err_msg = "signature based input mapping {} and output mapping {} " + \
                          "must be translated correctly into tensor name based mappings"
                assert _translated_input_mapping == _expected_input_mapping \
                    and _translated_output_mapping == _expected_output_mapping, \
                    err_msg.format(_translated_input_mapping, _translated_output_mapping)
예제 #3
0
def _gen_tensor_op_string_input_tests():
    op_name = 'someOp'
    for tnsr_idx in [0, 1, 2, 3, 5, 8, 15, 17]:
        tnsr_name = '{}:{}'.format(op_name, tnsr_idx)
        yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                       description='test tensor name to op name')
        yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                       description='test tensor name to tensor name')
예제 #4
0
    def test_get_graph_elements(self):
        """ Fetching graph elements by names and other graph elements """

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')

            g = issn.graph
            self.assertEqual(tfx.get_tensor(g, z), z)
            self.assertEqual(tfx.get_tensor(g, x), x)
            self.assertEqual(g.get_tensor_by_name("x:0"), tfx.get_tensor(g, x))
            self.assertEqual("x:0", tfx.tensor_name(g, x))
            self.assertEqual(g.get_operation_by_name("x"), tfx.get_op(g, x))
            self.assertEqual("x", tfx.op_name(g, x))
            self.assertEqual("z", tfx.op_name(g, z))
            self.assertEqual(tfx.tensor_name(g, z), "z:0")
            self.assertEqual(tfx.tensor_name(g, x), "x:0")
예제 #5
0
def _gen_tensor_op_string_input_tests():
    op_name = 'someOp'
    for tnsr_idx in [0, 1, 2, 3, 5, 8, 15, 17]:
        tnsr_name = '{}:{}'.format(op_name, tnsr_idx)
        yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                       description='test tensor name to op name')
        yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                       description='test tensor name to tensor name')
예제 #6
0
from pyspark.ml import Transformer
from pyspark.ml.param import Param, Params
from pyspark.sql.functions import udf

import sparkdl.graph.utils as tfx
import sparkdl.image.imageIO as imageIO
from sparkdl.param import (keyword_only, HasInputCol, HasOutputCol,
                           SparkDLTypeConverters, HasOutputMode)
import sparkdl.transformers.utils as utils
import sparkdl.utils.jvmapi as JVMAPI

from pyspark.ml.image import ImageSchema

__all__ = ['TFImageTransformer']

IMAGE_INPUT_TENSOR_NAME = tfx.tensor_name(utils.IMAGE_INPUT_PLACEHOLDER_NAME)
USER_GRAPH_NAMESPACE = 'given'
NEW_OUTPUT_PREFIX = 'sdl_flattened'


class TFImageTransformer(Transformer, HasInputCol, HasOutputCol,
                         HasOutputMode):
    """
    Applies the Tensorflow graph to the image column in DataFrame.

    Restrictions of the current API:

    * Does not use minibatches, which is a major low-hanging fruit for performance.
    * Only one output node can be specified.
    * The output is expected to be an image or a 1-d vector.
    * All images in the dataframe are expected be of the same numerical data type
예제 #7
0
            ], [_tensor_output_name]))
        gin = transformer.getTFInputGraph()
        local_features = _build_local_features()
        expected = _get_expected_result(gin, local_features)
        dataset = self.session.createDataFrame(local_features)
        _check_transformer_output(transformer, dataset, expected)


# The name of the input tensor
_tensor_input_name = "input_tensor"
# The name of the output tensor (scalar)
_tensor_output_name = "output_tensor"
# The size of the input tensor
_tensor_size = 3
# Input mapping for the Transformer
_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
# Output mapping for the Transformer
_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}
# Numerical threshold
_all_close_tolerance = 1e-5


def _build_transformer(gin_function):
    """
    Makes a session and a default graph, loads the simple graph into it, and then calls
    gin_function(session) to build the :py:obj:`TFInputGraph` object.
    Return the :py:obj:`TFTransformer` created from it.
    """
    graph = tf.Graph()
    with tf.Session(graph=graph) as sess, graph.as_default():
        _build_graph(sess)
예제 #8
0
def makeGraphUDF(graph,
                 udf_name,
                 fetches,
                 feeds_to_fields_map=None,
                 blocked=False,
                 register=True):
    """
    Create a Spark SQL UserDefinedFunction from a given TensorFlow Graph

    The following example creates a UDF that takes the input
    from a DataFrame column named 'image_col' and produce some random prediction.

    .. code-block:: python

        from sparkdl.graph.tensorframes_udf import makeUDF

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="input_x")
            z = tf.add(x, 3, name='z')
            makeGraphUDF(issn.graph, "my_tensorflow_udf", [z])

    Then this function can be used in a SQL query.

    .. code-block:: python

        df = spark.createDataFrame([Row(xCol=float(x)) for x in range(100)])
        df.createOrReplaceTempView("my_float_table")
        spark.sql("select my_tensorflow_udf(xCol) as zCol from my_float_table").show()

    :param graph: :py:class:`tf.Graph`, a TensorFlow Graph
    :param udf_name: str, name of the SQL UDF
    :param fetches: list, output tensors of the graph
    :param feeds_to_fields_map: a dict of str -> str,
                                The key is the name of a placeholder in the current
                                TensorFlow graph of computation.
                                The value is the name of a column in the dataframe.
                                For now, only the top-level fields in a dataframe are supported.

                                .. note:: For any placeholder that is
                                          not specified in the feed dictionary,
                                          the name of the input column is assumed to be
                                          the same as that of the placeholder.

    :param blocked: bool, if set to True, the TensorFrames will execute the function
                    over blocks/batches of rows. This should provide better performance.
                    Otherwise, the function is applied to individual rows
    :param register: bool, if set to True, the SQL UDF will be registered.
                     In this case, it will be accessible in SQL queries.
    :return: JVM function handle object
    """
    graph = tfx.validated_graph(graph)
    # pylint: disable=W0212
    # TODO: Work with TensorFlow's registered expansions
    # https://github.com/tensorflow/tensorflow/blob/v1.1.0/tensorflow/python/client/session.py#L74
    # TODO: Most part of this implementation might be better off moved to TensorFrames
    jvm_builder = JVMAPI.createTensorFramesModelBuilder()
    tfs.core._add_graph(graph, jvm_builder)

    # Obtain the fetches and their shapes
    fetch_names = [tfx.tensor_name(fetch, graph) for fetch in fetches]
    fetch_shapes = [tfx.get_shape(fetch, graph) for fetch in fetches]

    # Traverse the graph nodes and obtain all the placeholders and their shapes
    placeholder_names = []
    placeholder_shapes = []
    for node in graph.as_graph_def(add_shapes=True).node:
        # pylint: disable=len-as-condition
        # todo: refactor if not(node.input) and ...
        if len(node.input) == 0 and str(node.op) == 'Placeholder':
            tnsr_name = tfx.tensor_name(node.name, graph)
            tnsr = graph.get_tensor_by_name(tnsr_name)
            try:
                tnsr_shape = tfx.get_shape(tnsr, graph)
                placeholder_names.append(tnsr_name)
                placeholder_shapes.append(tnsr_shape)
            except ValueError:
                pass

    # Passing fetches and placeholders to TensorFrames
    jvm_builder.shape(fetch_names + placeholder_names,
                      fetch_shapes + placeholder_shapes)
    jvm_builder.fetches(fetch_names)
    # Passing feeds to TensorFrames
    placeholder_op_names = [
        tfx.op_name(name, graph) for name in placeholder_names
    ]
    # Passing the graph input to DataFrame column mapping and additional placeholder names
    tfs.core._add_inputs(jvm_builder, feeds_to_fields_map,
                         placeholder_op_names)

    if register:
        return jvm_builder.registerUDF(udf_name, blocked)
    else:
        return jvm_builder.makeUDF(udf_name, blocked)
예제 #9
0
def _gen_valid_tensor_op_input_combos():
    op_name = 'someConstOp'
    tnsr_name = '{}:0'.format(op_name)
    tnsr = tf.constant(1427.08, name=op_name)
    graph = tnsr.graph

    # Test for op_name
    yield TestCase(data=(op_name, tfx.op_name(tnsr)),
                   description='get op name from tensor (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr, graph)),
                   description='get op name from tensor (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                   description='get op name from tensor name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name, graph)),
                   description='get op name from tensor name (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op)),
                   description='get op name from op (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op, graph)),
                   description='get op name from op (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name)),
                   description='get op name from op name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name, graph)),
                   description='get op name from op name (with graph)')

    # Test for tensor_name
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr)),
                   description='get tensor name from tensor (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr, graph)),
                   description='get tensor name from tensor (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from tensor name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from tensor name (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op)),
                   description='get tensor name from op (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op, graph)),
                   description='get tensor name from op (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from op name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from op name (with graph)')

    # Test for get_tensor
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr, graph)),
                   description='get tensor from tensor')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr_name, graph)),
                   description='get tensor from tensor name')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr.op, graph)),
                   description='get tensor from op')
    yield TestCase(data=(tnsr, tfx.get_tensor(op_name, graph)),
                   description='get tensor from op name')

    # Test for get_op
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr, graph)),
                   description='get op from tensor')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr_name, graph)),
                   description='get op from tensor name')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr.op, graph)),
                   description='get op from op')
    yield TestCase(data=(tnsr.op, tfx.get_op(op_name, graph)),
                   description='test op from op name')
예제 #10
0
 def test_invalid_tensor_name_inputs_with_wrong_types(self, data, description):
     """ Must fail when provided wrong types """
     with self.assertRaises(TypeError, msg=description):
         tfx.tensor_name(data)
                                             tf_dtype)
            gin = transformer.getTFInputGraph()
            local_features = _build_local_features(np_type)
            expected = _get_expected_result(gin, local_features)
            schema = StructType([StructField('inputCol', spark_dtype)])
            dataset = self.session.createDataFrame(local_features, schema)
            _check_transformer_output(transformer, dataset, expected)

# The name of the input tensor
_tensor_input_name = "input_tensor"
# The name of the output tensor (scalar)
_tensor_output_name = "output_tensor"
# The size of the input tensor
_tensor_size = 3
# Input mapping for the Transformer
_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
# Output mapping for the Transformer
_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}
# Numerical threshold
_all_close_tolerance = 1e-5


def _build_transformer(gin_function, tf_dtype):
    """
    Makes a session and a default graph, loads the simple graph into it, and then calls
    gin_function(session) to build the :py:obj:`TFInputGraph` object.
    Return the :py:obj:`TFTransformer` created from it.
    """
    graph = tf.Graph()
    with tf.Session(graph=graph) as sess, graph.as_default():
        _build_graph(sess, tf_dtype)
예제 #12
0
from pyspark.ml import Transformer
from pyspark.ml.image import ImageSchema
from pyspark.ml.param import Param, Params
from pyspark.sql.functions import udf

import sparkdl.graph.utils as tfx
import sparkdl.image.imageIO as imageIO
from sparkdl.param import keyword_only, HasInputCol, HasOutputCol, HasOutputMode
from sparkdl.param import SparkDLTypeConverters
import sparkdl.transformers.utils as utils
import sparkdl.utils.jvmapi as JVMAPI


__all__ = ['TFImageTransformer']

IMAGE_INPUT_TENSOR_NAME = tfx.tensor_name(utils.IMAGE_INPUT_PLACEHOLDER_NAME)
USER_GRAPH_NAMESPACE = 'given'
NEW_OUTPUT_PREFIX = 'sdl_flattened'


class TFImageTransformer(Transformer, HasInputCol, HasOutputCol, HasOutputMode):
    """
    Applies the Tensorflow graph to the image column in DataFrame.

    Restrictions of the current API:

    * Does not use minibatches, which is a major low-hanging fruit for performance.
    * Only one output node can be specified.
    * The output is expected to be an image or a 1-d vector.
    * All images in the dataframe are expected be of the same numerical data type
      (i.e. the dtype of the values in the numpy array representation is the same.)
def makeGraphUDF(graph, udf_name, fetches, feeds_to_fields_map=None, blocked=False, register=True):
    """
    Create a Spark SQL UserDefinedFunction from a given TensorFlow Graph

    The following example creates a UDF that takes the input
    from a DataFrame column named 'image_col' and produce some random prediction.

    .. code-block:: python

        from sparkdl.graph.tensorframes_udf import makeUDF
        
        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="input_x")
            z = tf.add(x, 3, name='z')
            makeGraphUDF(issn.graph, "my_tensorflow_udf", [z])

    Then this function can be used in a SQL query.

    .. code-block:: python

        df = spark.createDataFrame([Row(xCol=float(x)) for x in range(100)])
        df.createOrReplaceTempView("my_float_table")
        spark.sql("select my_tensorflow_udf(xCol) as zCol from my_float_table").show()            

    :param graph: :py:class:`tf.Graph`, a TensorFlow Graph
    :param udf_name: str, name of the SQL UDF
    :param fetches: list, output tensors of the graph
    :param feeds_to_fields_map: a dict of str -> str,
                                The key is the name of a placeholder in the current
                                TensorFlow graph of computation.
                                The value is the name of a column in the dataframe.
                                For now, only the top-level fields in a dataframe are supported.

                                .. note:: For any placeholder that is
                                          not specified in the feed dictionary,
                                          the name of the input column is assumed to be
                                          the same as that of the placeholder.

    :param blocked: bool, if set to True, the TensorFrames will execute the function
                    over blocks/batches of rows. This should provide better performance.
                    Otherwise, the function is applied to individual rows
    :param register: bool, if set to True, the SQL UDF will be registered.
                     In this case, it will be accessible in SQL queries.
    :return: JVM function handle object
    """
    graph = tfx.validated_graph(graph)
    # pylint: disable=W0212
    # TODO: Work with TensorFlow's registered expansions
    # https://github.com/tensorflow/tensorflow/blob/v1.1.0/tensorflow/python/client/session.py#L74
    # TODO: Most part of this implementation might be better off moved to TensorFrames
    jvm_builder = JVMAPI.createTensorFramesModelBuilder()
    tfs.core._add_graph(graph, jvm_builder)

    # Obtain the fetches and their shapes
    fetch_names = [tfx.tensor_name(graph, fetch) for fetch in fetches]
    fetch_shapes = [tfx.get_shape(graph, fetch) for fetch in fetches]

    # Traverse the graph nodes and obtain all the placeholders and their shapes
    placeholder_names = []
    placeholder_shapes = []
    for node in graph.as_graph_def(add_shapes=True).node:
        if len(node.input) == 0 and str(node.op) == 'Placeholder':
            tnsr_name = tfx.tensor_name(graph, node.name)
            tnsr = graph.get_tensor_by_name(tnsr_name)
            try:
                tnsr_shape = tfx.get_shape(graph, tnsr)
                placeholder_names.append(tnsr_name)
                placeholder_shapes.append(tnsr_shape)
            except ValueError:
                pass

    # Passing fetches and placeholders to TensorFrames
    jvm_builder.shape(fetch_names + placeholder_names, fetch_shapes + placeholder_shapes)
    jvm_builder.fetches(fetch_names)
    # Passing feeds to TensorFrames
    placeholder_op_names = [tfx.op_name(graph, name) for name in placeholder_names]
    # Passing the graph input to DataFrame column mapping and additional placeholder names
    tfs.core._add_inputs(jvm_builder, feeds_to_fields_map, placeholder_op_names)

    if register:
        return jvm_builder.registerUDF(udf_name, blocked)
    else:
        return jvm_builder.makeUDF(udf_name, blocked)
예제 #14
0
def _gen_valid_tensor_op_input_combos():
    op_name = 'someConstOp'
    tnsr_name = '{}:0'.format(op_name)
    tnsr = tf.constant(1427.08, name=op_name)
    graph = tnsr.graph

    # Test for op_name
    yield TestCase(data=(op_name, tfx.op_name(tnsr)),
                   description='get op name from tensor (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr, graph)),
                   description='get op name from tensor (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                   description='get op name from tensor name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name, graph)),
                   description='get op name from tensor name (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op)),
                   description='get op name from op (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op, graph)),
                   description='get op name from op (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name)),
                   description='get op name from op name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name, graph)),
                   description='get op name from op name (with graph)')

    # Test for tensor_name
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr)),
                   description='get tensor name from tensor (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr, graph)),
                   description='get tensor name from tensor (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from tensor name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from tensor name (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op)),
                   description='get tensor name from op (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op, graph)),
                   description='get tensor name from op (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from op name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from op name (with graph)')

    # Test for get_tensor
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr, graph)),
                   description='get tensor from tensor')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr_name, graph)),
                   description='get tensor from tensor name')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr.op, graph)),
                   description='get tensor from op')
    yield TestCase(data=(tnsr, tfx.get_tensor(op_name, graph)),
                   description='get tensor from op name')

    # Test for get_op
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr, graph)),
                   description='get op from tensor')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr_name, graph)),
                   description='get op from tensor name')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr.op, graph)),
                   description='get op from op')
    yield TestCase(data=(tnsr.op, tfx.get_op(op_name, graph)),
                   description='test op from op name')
예제 #15
0
 def test_invalid_tensor_name_inputs_with_wrong_types(
         self, data, description):
     """ Must fail when provided wrong types """
     with self.assertRaises(TypeError, msg=description):
         tfx.tensor_name(data)