コード例 #1
0
    def test_saved_model_iomap(self):
        with _make_temp_directory() as tmp_dir:
            saved_model_dir = os.path.join(tmp_dir, 'saved_model')
            graph = tf.Graph()
            with tf.Session(graph=graph) as sess, graph.as_default():
                _build_graph()
                _build_saved_model(sess, saved_model_dir)
                # Build the transformer from exported serving model
                # We are using signatures, thus must provide the keys
                gin = TFInputGraph.fromSavedModelWithSignature(saved_model_dir, _serving_tag,
                                                               _serving_sigdef_key)

                _input_mapping_with_sigdef = {'inputCol': _tensor_input_signature}
                # Input mapping for the Transformer
                _translated_input_mapping = gin.translateInputMapping(_input_mapping_with_sigdef)
                _expected_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
                # Output mapping for the Transformer
                _output_mapping_with_sigdef = {_tensor_output_signature: 'outputCol'}
                _translated_output_mapping = gin.translateOutputMapping(_output_mapping_with_sigdef)
                _expected_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}

                err_msg = "signature based input mapping {} and output mapping {} " + \
                          "must be translated correctly into tensor name based mappings"
                assert _translated_input_mapping == _expected_input_mapping \
                    and _translated_output_mapping == _expected_output_mapping, \
                    err_msg.format(_translated_input_mapping, _translated_output_mapping)
コード例 #2
0
    def test_saved_model_iomap(self):
        with _make_temp_directory() as tmp_dir:
            saved_model_dir = os.path.join(tmp_dir, 'saved_model')
            graph = tf.Graph()
            with tf.Session(graph=graph) as sess, graph.as_default():
                _build_graph()
                _build_saved_model(sess, saved_model_dir)
                # Build the transformer from exported serving model
                # We are using signatures, thus must provide the keys
                gin = TFInputGraph.fromSavedModelWithSignature(saved_model_dir, _serving_tag,
                                                               _serving_sigdef_key)

                _input_mapping_with_sigdef = {'inputCol': _tensor_input_signature}
                # Input mapping for the Transformer
                _translated_input_mapping = gin.translateInputMapping(_input_mapping_with_sigdef)
                _expected_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
                # Output mapping for the Transformer
                _output_mapping_with_sigdef = {_tensor_output_signature: 'outputCol'}
                _translated_output_mapping = gin.translateOutputMapping(_output_mapping_with_sigdef)
                _expected_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}

                err_msg = "signature based input mapping {} and output mapping {} " + \
                          "must be translated correctly into tensor name based mappings"
                assert _translated_input_mapping == _expected_input_mapping \
                    and _translated_output_mapping == _expected_output_mapping, \
                    err_msg.format(_translated_input_mapping, _translated_output_mapping)
コード例 #3
0
def _gen_tensor_op_string_input_tests():
    op_name = 'someOp'
    for tnsr_idx in [0, 1, 2, 3, 5, 8, 15, 17]:
        tnsr_name = '{}:{}'.format(op_name, tnsr_idx)
        yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                       description='test tensor name to op name')
        yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                       description='test tensor name to tensor name')
コード例 #4
0
    def test_get_graph_elements(self):
        """ Fetching graph elements by names and other graph elements """

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="x")
            z = tf.add(x, 3, name='z')

            g = issn.graph
            self.assertEqual(tfx.get_tensor(g, z), z)
            self.assertEqual(tfx.get_tensor(g, x), x)
            self.assertEqual(g.get_tensor_by_name("x:0"), tfx.get_tensor(g, x))
            self.assertEqual("x:0", tfx.tensor_name(g, x))
            self.assertEqual(g.get_operation_by_name("x"), tfx.get_op(g, x))
            self.assertEqual("x", tfx.op_name(g, x))
            self.assertEqual("z", tfx.op_name(g, z))
            self.assertEqual(tfx.tensor_name(g, z), "z:0")
            self.assertEqual(tfx.tensor_name(g, x), "x:0")
コード例 #5
0
def _gen_tensor_op_string_input_tests():
    op_name = 'someOp'
    for tnsr_idx in [0, 1, 2, 3, 5, 8, 15, 17]:
        tnsr_name = '{}:{}'.format(op_name, tnsr_idx)
        yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                       description='test tensor name to op name')
        yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                       description='test tensor name to tensor name')
コード例 #6
0
from pyspark.ml import Transformer
from pyspark.ml.param import Param, Params
from pyspark.sql.functions import udf

import sparkdl.graph.utils as tfx
import sparkdl.image.imageIO as imageIO
from sparkdl.param import (keyword_only, HasInputCol, HasOutputCol,
                           SparkDLTypeConverters, HasOutputMode)
import sparkdl.transformers.utils as utils
import sparkdl.utils.jvmapi as JVMAPI

from pyspark.ml.image import ImageSchema

__all__ = ['TFImageTransformer']

IMAGE_INPUT_TENSOR_NAME = tfx.tensor_name(utils.IMAGE_INPUT_PLACEHOLDER_NAME)
USER_GRAPH_NAMESPACE = 'given'
NEW_OUTPUT_PREFIX = 'sdl_flattened'


class TFImageTransformer(Transformer, HasInputCol, HasOutputCol,
                         HasOutputMode):
    """
    Applies the Tensorflow graph to the image column in DataFrame.

    Restrictions of the current API:

    * Does not use minibatches, which is a major low-hanging fruit for performance.
    * Only one output node can be specified.
    * The output is expected to be an image or a 1-d vector.
    * All images in the dataframe are expected be of the same numerical data type
コード例 #7
0
            ], [_tensor_output_name]))
        gin = transformer.getTFInputGraph()
        local_features = _build_local_features()
        expected = _get_expected_result(gin, local_features)
        dataset = self.session.createDataFrame(local_features)
        _check_transformer_output(transformer, dataset, expected)


# The name of the input tensor
_tensor_input_name = "input_tensor"
# The name of the output tensor (scalar)
_tensor_output_name = "output_tensor"
# The size of the input tensor
_tensor_size = 3
# Input mapping for the Transformer
_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
# Output mapping for the Transformer
_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}
# Numerical threshold
_all_close_tolerance = 1e-5


def _build_transformer(gin_function):
    """
    Makes a session and a default graph, loads the simple graph into it, and then calls
    gin_function(session) to build the :py:obj:`TFInputGraph` object.
    Return the :py:obj:`TFTransformer` created from it.
    """
    graph = tf.Graph()
    with tf.Session(graph=graph) as sess, graph.as_default():
        _build_graph(sess)
コード例 #8
0
def makeGraphUDF(graph,
                 udf_name,
                 fetches,
                 feeds_to_fields_map=None,
                 blocked=False,
                 register=True):
    """
    Create a Spark SQL UserDefinedFunction from a given TensorFlow Graph

    The following example creates a UDF that takes the input
    from a DataFrame column named 'image_col' and produce some random prediction.

    .. code-block:: python

        from sparkdl.graph.tensorframes_udf import makeUDF

        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="input_x")
            z = tf.add(x, 3, name='z')
            makeGraphUDF(issn.graph, "my_tensorflow_udf", [z])

    Then this function can be used in a SQL query.

    .. code-block:: python

        df = spark.createDataFrame([Row(xCol=float(x)) for x in range(100)])
        df.createOrReplaceTempView("my_float_table")
        spark.sql("select my_tensorflow_udf(xCol) as zCol from my_float_table").show()

    :param graph: :py:class:`tf.Graph`, a TensorFlow Graph
    :param udf_name: str, name of the SQL UDF
    :param fetches: list, output tensors of the graph
    :param feeds_to_fields_map: a dict of str -> str,
                                The key is the name of a placeholder in the current
                                TensorFlow graph of computation.
                                The value is the name of a column in the dataframe.
                                For now, only the top-level fields in a dataframe are supported.

                                .. note:: For any placeholder that is
                                          not specified in the feed dictionary,
                                          the name of the input column is assumed to be
                                          the same as that of the placeholder.

    :param blocked: bool, if set to True, the TensorFrames will execute the function
                    over blocks/batches of rows. This should provide better performance.
                    Otherwise, the function is applied to individual rows
    :param register: bool, if set to True, the SQL UDF will be registered.
                     In this case, it will be accessible in SQL queries.
    :return: JVM function handle object
    """
    graph = tfx.validated_graph(graph)
    # pylint: disable=W0212
    # TODO: Work with TensorFlow's registered expansions
    # https://github.com/tensorflow/tensorflow/blob/v1.1.0/tensorflow/python/client/session.py#L74
    # TODO: Most part of this implementation might be better off moved to TensorFrames
    jvm_builder = JVMAPI.createTensorFramesModelBuilder()
    tfs.core._add_graph(graph, jvm_builder)

    # Obtain the fetches and their shapes
    fetch_names = [tfx.tensor_name(fetch, graph) for fetch in fetches]
    fetch_shapes = [tfx.get_shape(fetch, graph) for fetch in fetches]

    # Traverse the graph nodes and obtain all the placeholders and their shapes
    placeholder_names = []
    placeholder_shapes = []
    for node in graph.as_graph_def(add_shapes=True).node:
        # pylint: disable=len-as-condition
        # todo: refactor if not(node.input) and ...
        if len(node.input) == 0 and str(node.op) == 'Placeholder':
            tnsr_name = tfx.tensor_name(node.name, graph)
            tnsr = graph.get_tensor_by_name(tnsr_name)
            try:
                tnsr_shape = tfx.get_shape(tnsr, graph)
                placeholder_names.append(tnsr_name)
                placeholder_shapes.append(tnsr_shape)
            except ValueError:
                pass

    # Passing fetches and placeholders to TensorFrames
    jvm_builder.shape(fetch_names + placeholder_names,
                      fetch_shapes + placeholder_shapes)
    jvm_builder.fetches(fetch_names)
    # Passing feeds to TensorFrames
    placeholder_op_names = [
        tfx.op_name(name, graph) for name in placeholder_names
    ]
    # Passing the graph input to DataFrame column mapping and additional placeholder names
    tfs.core._add_inputs(jvm_builder, feeds_to_fields_map,
                         placeholder_op_names)

    if register:
        return jvm_builder.registerUDF(udf_name, blocked)
    else:
        return jvm_builder.makeUDF(udf_name, blocked)
コード例 #9
0
def _gen_valid_tensor_op_input_combos():
    op_name = 'someConstOp'
    tnsr_name = '{}:0'.format(op_name)
    tnsr = tf.constant(1427.08, name=op_name)
    graph = tnsr.graph

    # Test for op_name
    yield TestCase(data=(op_name, tfx.op_name(tnsr)),
                   description='get op name from tensor (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr, graph)),
                   description='get op name from tensor (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                   description='get op name from tensor name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name, graph)),
                   description='get op name from tensor name (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op)),
                   description='get op name from op (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op, graph)),
                   description='get op name from op (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name)),
                   description='get op name from op name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name, graph)),
                   description='get op name from op name (with graph)')

    # Test for tensor_name
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr)),
                   description='get tensor name from tensor (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr, graph)),
                   description='get tensor name from tensor (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from tensor name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from tensor name (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op)),
                   description='get tensor name from op (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op, graph)),
                   description='get tensor name from op (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from op name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from op name (with graph)')

    # Test for get_tensor
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr, graph)),
                   description='get tensor from tensor')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr_name, graph)),
                   description='get tensor from tensor name')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr.op, graph)),
                   description='get tensor from op')
    yield TestCase(data=(tnsr, tfx.get_tensor(op_name, graph)),
                   description='get tensor from op name')

    # Test for get_op
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr, graph)),
                   description='get op from tensor')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr_name, graph)),
                   description='get op from tensor name')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr.op, graph)),
                   description='get op from op')
    yield TestCase(data=(tnsr.op, tfx.get_op(op_name, graph)),
                   description='test op from op name')
コード例 #10
0
 def test_invalid_tensor_name_inputs_with_wrong_types(self, data, description):
     """ Must fail when provided wrong types """
     with self.assertRaises(TypeError, msg=description):
         tfx.tensor_name(data)
コード例 #11
0
                                             tf_dtype)
            gin = transformer.getTFInputGraph()
            local_features = _build_local_features(np_type)
            expected = _get_expected_result(gin, local_features)
            schema = StructType([StructField('inputCol', spark_dtype)])
            dataset = self.session.createDataFrame(local_features, schema)
            _check_transformer_output(transformer, dataset, expected)

# The name of the input tensor
_tensor_input_name = "input_tensor"
# The name of the output tensor (scalar)
_tensor_output_name = "output_tensor"
# The size of the input tensor
_tensor_size = 3
# Input mapping for the Transformer
_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)}
# Output mapping for the Transformer
_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'}
# Numerical threshold
_all_close_tolerance = 1e-5


def _build_transformer(gin_function, tf_dtype):
    """
    Makes a session and a default graph, loads the simple graph into it, and then calls
    gin_function(session) to build the :py:obj:`TFInputGraph` object.
    Return the :py:obj:`TFTransformer` created from it.
    """
    graph = tf.Graph()
    with tf.Session(graph=graph) as sess, graph.as_default():
        _build_graph(sess, tf_dtype)
コード例 #12
0
from pyspark.ml import Transformer
from pyspark.ml.image import ImageSchema
from pyspark.ml.param import Param, Params
from pyspark.sql.functions import udf

import sparkdl.graph.utils as tfx
import sparkdl.image.imageIO as imageIO
from sparkdl.param import keyword_only, HasInputCol, HasOutputCol, HasOutputMode
from sparkdl.param import SparkDLTypeConverters
import sparkdl.transformers.utils as utils
import sparkdl.utils.jvmapi as JVMAPI


__all__ = ['TFImageTransformer']

IMAGE_INPUT_TENSOR_NAME = tfx.tensor_name(utils.IMAGE_INPUT_PLACEHOLDER_NAME)
USER_GRAPH_NAMESPACE = 'given'
NEW_OUTPUT_PREFIX = 'sdl_flattened'


class TFImageTransformer(Transformer, HasInputCol, HasOutputCol, HasOutputMode):
    """
    Applies the Tensorflow graph to the image column in DataFrame.

    Restrictions of the current API:

    * Does not use minibatches, which is a major low-hanging fruit for performance.
    * Only one output node can be specified.
    * The output is expected to be an image or a 1-d vector.
    * All images in the dataframe are expected be of the same numerical data type
      (i.e. the dtype of the values in the numpy array representation is the same.)
コード例 #13
0
def makeGraphUDF(graph, udf_name, fetches, feeds_to_fields_map=None, blocked=False, register=True):
    """
    Create a Spark SQL UserDefinedFunction from a given TensorFlow Graph

    The following example creates a UDF that takes the input
    from a DataFrame column named 'image_col' and produce some random prediction.

    .. code-block:: python

        from sparkdl.graph.tensorframes_udf import makeUDF
        
        with IsolatedSession() as issn:
            x = tf.placeholder(tf.double, shape=[], name="input_x")
            z = tf.add(x, 3, name='z')
            makeGraphUDF(issn.graph, "my_tensorflow_udf", [z])

    Then this function can be used in a SQL query.

    .. code-block:: python

        df = spark.createDataFrame([Row(xCol=float(x)) for x in range(100)])
        df.createOrReplaceTempView("my_float_table")
        spark.sql("select my_tensorflow_udf(xCol) as zCol from my_float_table").show()            

    :param graph: :py:class:`tf.Graph`, a TensorFlow Graph
    :param udf_name: str, name of the SQL UDF
    :param fetches: list, output tensors of the graph
    :param feeds_to_fields_map: a dict of str -> str,
                                The key is the name of a placeholder in the current
                                TensorFlow graph of computation.
                                The value is the name of a column in the dataframe.
                                For now, only the top-level fields in a dataframe are supported.

                                .. note:: For any placeholder that is
                                          not specified in the feed dictionary,
                                          the name of the input column is assumed to be
                                          the same as that of the placeholder.

    :param blocked: bool, if set to True, the TensorFrames will execute the function
                    over blocks/batches of rows. This should provide better performance.
                    Otherwise, the function is applied to individual rows
    :param register: bool, if set to True, the SQL UDF will be registered.
                     In this case, it will be accessible in SQL queries.
    :return: JVM function handle object
    """
    graph = tfx.validated_graph(graph)
    # pylint: disable=W0212
    # TODO: Work with TensorFlow's registered expansions
    # https://github.com/tensorflow/tensorflow/blob/v1.1.0/tensorflow/python/client/session.py#L74
    # TODO: Most part of this implementation might be better off moved to TensorFrames
    jvm_builder = JVMAPI.createTensorFramesModelBuilder()
    tfs.core._add_graph(graph, jvm_builder)

    # Obtain the fetches and their shapes
    fetch_names = [tfx.tensor_name(graph, fetch) for fetch in fetches]
    fetch_shapes = [tfx.get_shape(graph, fetch) for fetch in fetches]

    # Traverse the graph nodes and obtain all the placeholders and their shapes
    placeholder_names = []
    placeholder_shapes = []
    for node in graph.as_graph_def(add_shapes=True).node:
        if len(node.input) == 0 and str(node.op) == 'Placeholder':
            tnsr_name = tfx.tensor_name(graph, node.name)
            tnsr = graph.get_tensor_by_name(tnsr_name)
            try:
                tnsr_shape = tfx.get_shape(graph, tnsr)
                placeholder_names.append(tnsr_name)
                placeholder_shapes.append(tnsr_shape)
            except ValueError:
                pass

    # Passing fetches and placeholders to TensorFrames
    jvm_builder.shape(fetch_names + placeholder_names, fetch_shapes + placeholder_shapes)
    jvm_builder.fetches(fetch_names)
    # Passing feeds to TensorFrames
    placeholder_op_names = [tfx.op_name(graph, name) for name in placeholder_names]
    # Passing the graph input to DataFrame column mapping and additional placeholder names
    tfs.core._add_inputs(jvm_builder, feeds_to_fields_map, placeholder_op_names)

    if register:
        return jvm_builder.registerUDF(udf_name, blocked)
    else:
        return jvm_builder.makeUDF(udf_name, blocked)
コード例 #14
0
def _gen_valid_tensor_op_input_combos():
    op_name = 'someConstOp'
    tnsr_name = '{}:0'.format(op_name)
    tnsr = tf.constant(1427.08, name=op_name)
    graph = tnsr.graph

    # Test for op_name
    yield TestCase(data=(op_name, tfx.op_name(tnsr)),
                   description='get op name from tensor (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr, graph)),
                   description='get op name from tensor (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name)),
                   description='get op name from tensor name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr_name, graph)),
                   description='get op name from tensor name (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op)),
                   description='get op name from op (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(tnsr.op, graph)),
                   description='get op name from op (with graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name)),
                   description='get op name from op name (no graph)')
    yield TestCase(data=(op_name, tfx.op_name(op_name, graph)),
                   description='get op name from op name (with graph)')

    # Test for tensor_name
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr)),
                   description='get tensor name from tensor (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr, graph)),
                   description='get tensor name from tensor (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from tensor name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from tensor name (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op)),
                   description='get tensor name from op (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op, graph)),
                   description='get tensor name from op (with graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)),
                   description='get tensor name from op name (no graph)')
    yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)),
                   description='get tensor name from op name (with graph)')

    # Test for get_tensor
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr, graph)),
                   description='get tensor from tensor')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr_name, graph)),
                   description='get tensor from tensor name')
    yield TestCase(data=(tnsr, tfx.get_tensor(tnsr.op, graph)),
                   description='get tensor from op')
    yield TestCase(data=(tnsr, tfx.get_tensor(op_name, graph)),
                   description='get tensor from op name')

    # Test for get_op
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr, graph)),
                   description='get op from tensor')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr_name, graph)),
                   description='get op from tensor name')
    yield TestCase(data=(tnsr.op, tfx.get_op(tnsr.op, graph)),
                   description='get op from op')
    yield TestCase(data=(tnsr.op, tfx.get_op(op_name, graph)),
                   description='test op from op name')
コード例 #15
0
 def test_invalid_tensor_name_inputs_with_wrong_types(
         self, data, description):
     """ Must fail when provided wrong types """
     with self.assertRaises(TypeError, msg=description):
         tfx.tensor_name(data)