def test_saved_model_iomap(self): with _make_temp_directory() as tmp_dir: saved_model_dir = os.path.join(tmp_dir, 'saved_model') graph = tf.Graph() with tf.Session(graph=graph) as sess, graph.as_default(): _build_graph() _build_saved_model(sess, saved_model_dir) # Build the transformer from exported serving model # We are using signatures, thus must provide the keys gin = TFInputGraph.fromSavedModelWithSignature(saved_model_dir, _serving_tag, _serving_sigdef_key) _input_mapping_with_sigdef = {'inputCol': _tensor_input_signature} # Input mapping for the Transformer _translated_input_mapping = gin.translateInputMapping(_input_mapping_with_sigdef) _expected_input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)} # Output mapping for the Transformer _output_mapping_with_sigdef = {_tensor_output_signature: 'outputCol'} _translated_output_mapping = gin.translateOutputMapping(_output_mapping_with_sigdef) _expected_output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'} err_msg = "signature based input mapping {} and output mapping {} " + \ "must be translated correctly into tensor name based mappings" assert _translated_input_mapping == _expected_input_mapping \ and _translated_output_mapping == _expected_output_mapping, \ err_msg.format(_translated_input_mapping, _translated_output_mapping)
def _gen_tensor_op_string_input_tests(): op_name = 'someOp' for tnsr_idx in [0, 1, 2, 3, 5, 8, 15, 17]: tnsr_name = '{}:{}'.format(op_name, tnsr_idx) yield TestCase(data=(op_name, tfx.op_name(tnsr_name)), description='test tensor name to op name') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)), description='test tensor name to tensor name')
def test_get_graph_elements(self): """ Fetching graph elements by names and other graph elements """ with IsolatedSession() as issn: x = tf.placeholder(tf.double, shape=[], name="x") z = tf.add(x, 3, name='z') g = issn.graph self.assertEqual(tfx.get_tensor(g, z), z) self.assertEqual(tfx.get_tensor(g, x), x) self.assertEqual(g.get_tensor_by_name("x:0"), tfx.get_tensor(g, x)) self.assertEqual("x:0", tfx.tensor_name(g, x)) self.assertEqual(g.get_operation_by_name("x"), tfx.get_op(g, x)) self.assertEqual("x", tfx.op_name(g, x)) self.assertEqual("z", tfx.op_name(g, z)) self.assertEqual(tfx.tensor_name(g, z), "z:0") self.assertEqual(tfx.tensor_name(g, x), "x:0")
from pyspark.ml import Transformer from pyspark.ml.param import Param, Params from pyspark.sql.functions import udf import sparkdl.graph.utils as tfx import sparkdl.image.imageIO as imageIO from sparkdl.param import (keyword_only, HasInputCol, HasOutputCol, SparkDLTypeConverters, HasOutputMode) import sparkdl.transformers.utils as utils import sparkdl.utils.jvmapi as JVMAPI from pyspark.ml.image import ImageSchema __all__ = ['TFImageTransformer'] IMAGE_INPUT_TENSOR_NAME = tfx.tensor_name(utils.IMAGE_INPUT_PLACEHOLDER_NAME) USER_GRAPH_NAMESPACE = 'given' NEW_OUTPUT_PREFIX = 'sdl_flattened' class TFImageTransformer(Transformer, HasInputCol, HasOutputCol, HasOutputMode): """ Applies the Tensorflow graph to the image column in DataFrame. Restrictions of the current API: * Does not use minibatches, which is a major low-hanging fruit for performance. * Only one output node can be specified. * The output is expected to be an image or a 1-d vector. * All images in the dataframe are expected be of the same numerical data type
], [_tensor_output_name])) gin = transformer.getTFInputGraph() local_features = _build_local_features() expected = _get_expected_result(gin, local_features) dataset = self.session.createDataFrame(local_features) _check_transformer_output(transformer, dataset, expected) # The name of the input tensor _tensor_input_name = "input_tensor" # The name of the output tensor (scalar) _tensor_output_name = "output_tensor" # The size of the input tensor _tensor_size = 3 # Input mapping for the Transformer _input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)} # Output mapping for the Transformer _output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'} # Numerical threshold _all_close_tolerance = 1e-5 def _build_transformer(gin_function): """ Makes a session and a default graph, loads the simple graph into it, and then calls gin_function(session) to build the :py:obj:`TFInputGraph` object. Return the :py:obj:`TFTransformer` created from it. """ graph = tf.Graph() with tf.Session(graph=graph) as sess, graph.as_default(): _build_graph(sess)
def makeGraphUDF(graph, udf_name, fetches, feeds_to_fields_map=None, blocked=False, register=True): """ Create a Spark SQL UserDefinedFunction from a given TensorFlow Graph The following example creates a UDF that takes the input from a DataFrame column named 'image_col' and produce some random prediction. .. code-block:: python from sparkdl.graph.tensorframes_udf import makeUDF with IsolatedSession() as issn: x = tf.placeholder(tf.double, shape=[], name="input_x") z = tf.add(x, 3, name='z') makeGraphUDF(issn.graph, "my_tensorflow_udf", [z]) Then this function can be used in a SQL query. .. code-block:: python df = spark.createDataFrame([Row(xCol=float(x)) for x in range(100)]) df.createOrReplaceTempView("my_float_table") spark.sql("select my_tensorflow_udf(xCol) as zCol from my_float_table").show() :param graph: :py:class:`tf.Graph`, a TensorFlow Graph :param udf_name: str, name of the SQL UDF :param fetches: list, output tensors of the graph :param feeds_to_fields_map: a dict of str -> str, The key is the name of a placeholder in the current TensorFlow graph of computation. The value is the name of a column in the dataframe. For now, only the top-level fields in a dataframe are supported. .. note:: For any placeholder that is not specified in the feed dictionary, the name of the input column is assumed to be the same as that of the placeholder. :param blocked: bool, if set to True, the TensorFrames will execute the function over blocks/batches of rows. This should provide better performance. Otherwise, the function is applied to individual rows :param register: bool, if set to True, the SQL UDF will be registered. In this case, it will be accessible in SQL queries. :return: JVM function handle object """ graph = tfx.validated_graph(graph) # pylint: disable=W0212 # TODO: Work with TensorFlow's registered expansions # https://github.com/tensorflow/tensorflow/blob/v1.1.0/tensorflow/python/client/session.py#L74 # TODO: Most part of this implementation might be better off moved to TensorFrames jvm_builder = JVMAPI.createTensorFramesModelBuilder() tfs.core._add_graph(graph, jvm_builder) # Obtain the fetches and their shapes fetch_names = [tfx.tensor_name(fetch, graph) for fetch in fetches] fetch_shapes = [tfx.get_shape(fetch, graph) for fetch in fetches] # Traverse the graph nodes and obtain all the placeholders and their shapes placeholder_names = [] placeholder_shapes = [] for node in graph.as_graph_def(add_shapes=True).node: # pylint: disable=len-as-condition # todo: refactor if not(node.input) and ... if len(node.input) == 0 and str(node.op) == 'Placeholder': tnsr_name = tfx.tensor_name(node.name, graph) tnsr = graph.get_tensor_by_name(tnsr_name) try: tnsr_shape = tfx.get_shape(tnsr, graph) placeholder_names.append(tnsr_name) placeholder_shapes.append(tnsr_shape) except ValueError: pass # Passing fetches and placeholders to TensorFrames jvm_builder.shape(fetch_names + placeholder_names, fetch_shapes + placeholder_shapes) jvm_builder.fetches(fetch_names) # Passing feeds to TensorFrames placeholder_op_names = [ tfx.op_name(name, graph) for name in placeholder_names ] # Passing the graph input to DataFrame column mapping and additional placeholder names tfs.core._add_inputs(jvm_builder, feeds_to_fields_map, placeholder_op_names) if register: return jvm_builder.registerUDF(udf_name, blocked) else: return jvm_builder.makeUDF(udf_name, blocked)
def _gen_valid_tensor_op_input_combos(): op_name = 'someConstOp' tnsr_name = '{}:0'.format(op_name) tnsr = tf.constant(1427.08, name=op_name) graph = tnsr.graph # Test for op_name yield TestCase(data=(op_name, tfx.op_name(tnsr)), description='get op name from tensor (no graph)') yield TestCase(data=(op_name, tfx.op_name(tnsr, graph)), description='get op name from tensor (with graph)') yield TestCase(data=(op_name, tfx.op_name(tnsr_name)), description='get op name from tensor name (no graph)') yield TestCase(data=(op_name, tfx.op_name(tnsr_name, graph)), description='get op name from tensor name (with graph)') yield TestCase(data=(op_name, tfx.op_name(tnsr.op)), description='get op name from op (no graph)') yield TestCase(data=(op_name, tfx.op_name(tnsr.op, graph)), description='get op name from op (with graph)') yield TestCase(data=(op_name, tfx.op_name(op_name)), description='get op name from op name (no graph)') yield TestCase(data=(op_name, tfx.op_name(op_name, graph)), description='get op name from op name (with graph)') # Test for tensor_name yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr)), description='get tensor name from tensor (no graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr, graph)), description='get tensor name from tensor (with graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)), description='get tensor name from tensor name (no graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)), description='get tensor name from tensor name (with graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op)), description='get tensor name from op (no graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr.op, graph)), description='get tensor name from op (with graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name)), description='get tensor name from op name (no graph)') yield TestCase(data=(tnsr_name, tfx.tensor_name(tnsr_name, graph)), description='get tensor name from op name (with graph)') # Test for get_tensor yield TestCase(data=(tnsr, tfx.get_tensor(tnsr, graph)), description='get tensor from tensor') yield TestCase(data=(tnsr, tfx.get_tensor(tnsr_name, graph)), description='get tensor from tensor name') yield TestCase(data=(tnsr, tfx.get_tensor(tnsr.op, graph)), description='get tensor from op') yield TestCase(data=(tnsr, tfx.get_tensor(op_name, graph)), description='get tensor from op name') # Test for get_op yield TestCase(data=(tnsr.op, tfx.get_op(tnsr, graph)), description='get op from tensor') yield TestCase(data=(tnsr.op, tfx.get_op(tnsr_name, graph)), description='get op from tensor name') yield TestCase(data=(tnsr.op, tfx.get_op(tnsr.op, graph)), description='get op from op') yield TestCase(data=(tnsr.op, tfx.get_op(op_name, graph)), description='test op from op name')
def test_invalid_tensor_name_inputs_with_wrong_types(self, data, description): """ Must fail when provided wrong types """ with self.assertRaises(TypeError, msg=description): tfx.tensor_name(data)
tf_dtype) gin = transformer.getTFInputGraph() local_features = _build_local_features(np_type) expected = _get_expected_result(gin, local_features) schema = StructType([StructField('inputCol', spark_dtype)]) dataset = self.session.createDataFrame(local_features, schema) _check_transformer_output(transformer, dataset, expected) # The name of the input tensor _tensor_input_name = "input_tensor" # The name of the output tensor (scalar) _tensor_output_name = "output_tensor" # The size of the input tensor _tensor_size = 3 # Input mapping for the Transformer _input_mapping = {'inputCol': tfx.tensor_name(_tensor_input_name)} # Output mapping for the Transformer _output_mapping = {tfx.tensor_name(_tensor_output_name): 'outputCol'} # Numerical threshold _all_close_tolerance = 1e-5 def _build_transformer(gin_function, tf_dtype): """ Makes a session and a default graph, loads the simple graph into it, and then calls gin_function(session) to build the :py:obj:`TFInputGraph` object. Return the :py:obj:`TFTransformer` created from it. """ graph = tf.Graph() with tf.Session(graph=graph) as sess, graph.as_default(): _build_graph(sess, tf_dtype)
from pyspark.ml import Transformer from pyspark.ml.image import ImageSchema from pyspark.ml.param import Param, Params from pyspark.sql.functions import udf import sparkdl.graph.utils as tfx import sparkdl.image.imageIO as imageIO from sparkdl.param import keyword_only, HasInputCol, HasOutputCol, HasOutputMode from sparkdl.param import SparkDLTypeConverters import sparkdl.transformers.utils as utils import sparkdl.utils.jvmapi as JVMAPI __all__ = ['TFImageTransformer'] IMAGE_INPUT_TENSOR_NAME = tfx.tensor_name(utils.IMAGE_INPUT_PLACEHOLDER_NAME) USER_GRAPH_NAMESPACE = 'given' NEW_OUTPUT_PREFIX = 'sdl_flattened' class TFImageTransformer(Transformer, HasInputCol, HasOutputCol, HasOutputMode): """ Applies the Tensorflow graph to the image column in DataFrame. Restrictions of the current API: * Does not use minibatches, which is a major low-hanging fruit for performance. * Only one output node can be specified. * The output is expected to be an image or a 1-d vector. * All images in the dataframe are expected be of the same numerical data type (i.e. the dtype of the values in the numpy array representation is the same.)
def makeGraphUDF(graph, udf_name, fetches, feeds_to_fields_map=None, blocked=False, register=True): """ Create a Spark SQL UserDefinedFunction from a given TensorFlow Graph The following example creates a UDF that takes the input from a DataFrame column named 'image_col' and produce some random prediction. .. code-block:: python from sparkdl.graph.tensorframes_udf import makeUDF with IsolatedSession() as issn: x = tf.placeholder(tf.double, shape=[], name="input_x") z = tf.add(x, 3, name='z') makeGraphUDF(issn.graph, "my_tensorflow_udf", [z]) Then this function can be used in a SQL query. .. code-block:: python df = spark.createDataFrame([Row(xCol=float(x)) for x in range(100)]) df.createOrReplaceTempView("my_float_table") spark.sql("select my_tensorflow_udf(xCol) as zCol from my_float_table").show() :param graph: :py:class:`tf.Graph`, a TensorFlow Graph :param udf_name: str, name of the SQL UDF :param fetches: list, output tensors of the graph :param feeds_to_fields_map: a dict of str -> str, The key is the name of a placeholder in the current TensorFlow graph of computation. The value is the name of a column in the dataframe. For now, only the top-level fields in a dataframe are supported. .. note:: For any placeholder that is not specified in the feed dictionary, the name of the input column is assumed to be the same as that of the placeholder. :param blocked: bool, if set to True, the TensorFrames will execute the function over blocks/batches of rows. This should provide better performance. Otherwise, the function is applied to individual rows :param register: bool, if set to True, the SQL UDF will be registered. In this case, it will be accessible in SQL queries. :return: JVM function handle object """ graph = tfx.validated_graph(graph) # pylint: disable=W0212 # TODO: Work with TensorFlow's registered expansions # https://github.com/tensorflow/tensorflow/blob/v1.1.0/tensorflow/python/client/session.py#L74 # TODO: Most part of this implementation might be better off moved to TensorFrames jvm_builder = JVMAPI.createTensorFramesModelBuilder() tfs.core._add_graph(graph, jvm_builder) # Obtain the fetches and their shapes fetch_names = [tfx.tensor_name(graph, fetch) for fetch in fetches] fetch_shapes = [tfx.get_shape(graph, fetch) for fetch in fetches] # Traverse the graph nodes and obtain all the placeholders and their shapes placeholder_names = [] placeholder_shapes = [] for node in graph.as_graph_def(add_shapes=True).node: if len(node.input) == 0 and str(node.op) == 'Placeholder': tnsr_name = tfx.tensor_name(graph, node.name) tnsr = graph.get_tensor_by_name(tnsr_name) try: tnsr_shape = tfx.get_shape(graph, tnsr) placeholder_names.append(tnsr_name) placeholder_shapes.append(tnsr_shape) except ValueError: pass # Passing fetches and placeholders to TensorFrames jvm_builder.shape(fetch_names + placeholder_names, fetch_shapes + placeholder_shapes) jvm_builder.fetches(fetch_names) # Passing feeds to TensorFrames placeholder_op_names = [tfx.op_name(graph, name) for name in placeholder_names] # Passing the graph input to DataFrame column mapping and additional placeholder names tfs.core._add_inputs(jvm_builder, feeds_to_fields_map, placeholder_op_names) if register: return jvm_builder.registerUDF(udf_name, blocked) else: return jvm_builder.makeUDF(udf_name, blocked)
def test_invalid_tensor_name_inputs_with_wrong_types( self, data, description): """ Must fail when provided wrong types """ with self.assertRaises(TypeError, msg=description): tfx.tensor_name(data)