def test_apply_transform_missing_features(self):
     with self.assertRaises(ValueError):
         with tf.Graph().as_default():
             with tf.Session().as_default():
                 input_features = {}
                 saved_transform_io.apply_saved_transform(
                     self._test_saved_model, input_features)
 def test_apply_transform_type_mismatch(self):
     with self.assertRaises(ValueError):
         with tf.Graph().as_default():
             with tf.Session().as_default():
                 input_strings = tf.constant(['bogus'])  # tf.string
                 input_features = {'x': input_strings}
                 saved_transform_io.apply_saved_transform(
                     self._test_saved_model, input_features)
 def test_apply_transform_shape_mismatch(self):
     with self.assertRaises(ValueError):
         with tf.Graph().as_default():
             with tf.Session().as_default():
                 input_floats = tf.constant(1234.0)  # tf.float32
                 input_features = {'x': input_floats}
                 saved_transform_io.apply_saved_transform(
                     self._test_saved_model, input_features)
    def test_sparse_roundtrip(self):
        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                input_float = tf.sparse_placeholder(tf.float32)
                output = input_float / 5.0
                inputs = {'input': input_float}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64)
                values = np.array([1.0, 2.0], dtype=np.float32)
                shape = np.array([7, 9, 2], dtype=np.int64)
                input_sparse = tf.SparseTensor(indices=indices,
                                               values=values,
                                               dense_shape=shape)

                # Using a computed input gives confidence that the graphs are fused
                inputs = {'input': input_sparse * 10}
                outputs = saved_transform_io.apply_saved_transform(
                    export_path, inputs)
                output_sparse = outputs['output']
                self.assertTrue(isinstance(output_sparse, tf.SparseTensor))
                result = session.run(output_sparse)

                # indices and shape unchanged; values divided by 2
                self.assertEqual(indices.tolist(), result.indices.tolist())
                self.assertEqual([2.0, 4.0], result.values.tolist())
                self.assertEqual(shape.tolist(), result.dense_shape.tolist())
    def test_table_roundtrip(self):
        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                input_string = tf.placeholder(tf.string)
                # Map string through a table, in this case based on a constant tensor.
                table = lookup.index_table_from_tensor(
                    tf.constant(['cat', 'dog', 'giraffe']))
                output = table.lookup(input_string)
                inputs = {'input': input_string}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                # Using a computed input gives confidence that the graphs are fused.
                input_string = tf.constant('dog')
                inputs = {'input': input_string}
                outputs = saved_transform_io.apply_saved_transform(
                    export_path, inputs)
                session.run(tf.tables_initializer())
                result = session.run(outputs['output'])
                self.assertEqual(1, result)
Example #6
0
  def raw_training_input_fn():
    """Training input function that reads raw data and applies transforms."""

    if key_feature_name is not None:
      keys, raw_data = tf.contrib.learn.io.read_keyed_batch_features(
          raw_data_file_pattern, training_batch_size, raw_training_feature_spec,
          reader, **read_batch_features_args)
    else:
      raw_data = tf.contrib.learn.io.read_batch_features(
          raw_data_file_pattern, training_batch_size, raw_training_feature_spec,
          reader, **read_batch_features_args)
    transformed_data = saved_transform_io.apply_saved_transform(
        transform_savedmodel_dir, raw_data)

    transformed_features = {
        k: v for k, v in six.iteritems(transformed_data)
        if k in transformed_feature_keys}
    transformed_labels = {
        k: v for k, v in six.iteritems(transformed_data)
        if k in transformed_label_keys}

    if convert_scalars_to_vectors:
      transformed_features = _convert_scalars_to_vectors(transformed_features)
      transformed_labels = _convert_scalars_to_vectors(transformed_labels)

    if key_feature_name is not None:
      transformed_features[key_feature_name] = keys

    if len(transformed_labels) == 1:
      (_, transformed_labels), = transformed_labels.items()
    return transformed_features, transformed_labels
 def test_apply_transform_extra_features_no_passthrough(self):
   with self.assertRaises(ValueError):
     with tf.Graph().as_default():
       with tf.Session().as_default():
         input_floats = tf.constant([1234.0])  # tf.float32
         input_features = {'x': input_floats,
                           'extra_1': tf.constant('1'),
                           'extra_2': tf.constant('2')}
         _ = saved_transform_io.apply_saved_transform(
             self._test_saved_model, input_features)
Example #8
0
 def test_apply_saved_transform_to_tensor_outside_scope(self):
     with tf.Graph().as_default():
         input_floats = tf.constant([1237.0])  # tf.float32
         with tf.name_scope('my_scope'):
             with tf.Session().as_default() as session:
                 input_features = {'x': input_floats}
                 transformed_features = saved_transform_io.apply_saved_transform(
                     self._test_saved_model, input_features)
                 self.assertEqual(['x_scaled'], transformed_features.keys())
                 result_tensor = transformed_features['x_scaled']
                 self.assertAllEqual(session.run(result_tensor), [247.0])
Example #9
0
    def testImportAndExportWithTensorValueMapping(self):
        # Export the function "z = x * min(y) + x + min(y)" with min(y) replaced by
        # 6.
        def preprocessing_fn(inputs):
            return {
                'z':
                api.map(lambda x, y: x * y + x + y, inputs['x'],
                        analyzers.min(inputs['y']))
            }

        input_schema = self.toSchema({
            'x': tf.FixedLenFeature((), tf.float32),
            'y': tf.FixedLenFeature((), tf.float32)
        })

        inputs, outputs = impl_helper.run_preprocessing_fn(
            preprocessing_fn, input_schema)
        saved_model_dir = os.path.join(self.get_temp_dir(), 'replace_original')
        input_columns_to_statistics = impl_helper.make_transform_fn_def(
            input_schema, inputs, outputs, saved_model_dir)
        self.assertEqual(len(input_columns_to_statistics.keys()), 1)
        y_min_input_name = input_columns_to_statistics.keys()[0]

        g = tf.Graph()
        with g.as_default():
            x = tf.placeholder(tf.float32, ())
            y = tf.placeholder(tf.float32, ())
            z = x * y + x + y
        new_saved_model_dir = os.path.join(self.get_temp_dir(), 'replace_new')
        impl_helper.replace_tensors_with_constant_values(
            saved_model_dir, new_saved_model_dir, {
                y_min_input_name:
                impl_helper.ConstantTensorValue(6, tf.float32, ())
            })

        # Import the function, applying it to constants for x and y.
        g = tf.Graph()
        with g.as_default():
            x = tf.constant(5, tf.float32, (1, ))
            y = tf.constant(1000, tf.float32, (1, ))  #  Value is never used.
            outputs = saved_transform_io.apply_saved_transform(
                new_saved_model_dir, {
                    'x': x,
                    'y': y
                })
            z = outputs['z']

            sess = tf.Session()
            with sess.as_default():
                # Check result is 5 * 6 + 5 + 6 = 41.
                self.assertEqual(41, z.eval())
    def test_stale_asset_collections_are_cleaned(self):
        vocabulary_file = os.path.join(compat.as_bytes(test.get_temp_dir()),
                                       compat.as_bytes('asset'))
        file_io.write_string_to_file(vocabulary_file, 'foo bar baz')

        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        # create a SavedModel including assets
        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                input_string = tf.placeholder(tf.string)
                # Map string through a table loaded from an asset file
                table = lookup.index_table_from_file(vocabulary_file,
                                                     num_oov_buckets=12,
                                                     default_value=12)
                output = table.lookup(input_string)
                inputs = {'input': input_string}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        # Load it and save it again repeatedly, verifying that the asset collections
        # remain valid.
        for _ in [1, 2, 3]:
            with tf.Graph().as_default() as g:
                with tf.Session().as_default() as session:
                    input_string = tf.constant('dog')
                    inputs = {'input': input_string}
                    outputs = saved_transform_io.apply_saved_transform(
                        export_path, inputs)

                    self.assertEqual(
                        1,
                        len(g.get_collection(ops.GraphKeys.ASSET_FILEPATHS)))
                    self.assertEqual(
                        0,
                        len(
                            g.get_collection(
                                tf.saved_model.constants.ASSETS_KEY)))

                    # Check that every ASSET_FILEPATHS refers to a Tensor in the graph.
                    # If not, get_tensor_by_name() raises KeyError.
                    for asset_path in g.get_collection(
                            ops.GraphKeys.ASSET_FILEPATHS):
                        tensor_name = asset_path.name
                        g.get_tensor_by_name(tensor_name)

                    export_path = os.path.join(tempfile.mkdtemp(), 'export')
                    saved_transform_io.write_saved_transform_from_session(
                        session, inputs, outputs, export_path)
Example #11
0
    def testImportAndExportDense(self):
        # Export the function "z = x * y + x + y"
        def preprocessing_fn(inputs):
            return {
                'z': api.map(lambda x, y: x * y + x + y, inputs['x'],
                             inputs['y'])
            }

        input_schema = self.toSchema({
            'x': tf.FixedLenFeature((), tf.float32),
            'y': tf.FixedLenFeature((), tf.float32)
        })

        inputs, outputs = impl_helper.run_preprocessing_fn(
            preprocessing_fn, input_schema)
        saved_model_dir = os.path.join(self.get_temp_dir(), 'dense')
        _ = impl_helper.make_transform_fn_def(input_schema, inputs, outputs,
                                              saved_model_dir)

        # Import the function, applying it to constants for x and y.
        g = tf.Graph()
        with g.as_default():
            x = tf.constant(5, tf.float32, (1, ))
            y = tf.constant(6, tf.float32, (1, ))
            outputs = saved_transform_io.apply_saved_transform(
                saved_model_dir, {
                    'x': x,
                    'y': y
                })
            z = outputs['z']

            sess = tf.Session()
            with sess.as_default():
                # Check result is 5 * 6 + 5 + 6 = 41.
                self.assertEqual(41, z.eval())

        # Import the graph, feeding it values for x and y.
        g = tf.Graph()
        with g.as_default():
            inputs, outputs = impl_helper.load_transform_fn_def(
                saved_model_dir)
            x = inputs['x']
            y = inputs['y']
            z = outputs['z']

            sess = tf.Session()
            with sess.as_default():
                # Check result is 5 * 6 + 5 + 6 = 41.
                self.assertEqual(41, sess.run(z, {x: [5], y: [6]}))
  def test_apply_saved_transform(self):
    with tf.Graph().as_default() as graph:
      with tf.Session().as_default():
        input_floats = tf.constant([1234.0])  # tf.float32
        input_features = {'x': input_floats}
        transformed_features = saved_transform_io.apply_saved_transform(
            self._test_saved_model, input_features)
        self.assertEqual(['x_scaled'], transformed_features.keys())
        result_tensor = transformed_features['x_scaled']
        self.assertTrue(isinstance(result_tensor, tf.Tensor))

        self.assertEqual(graph.get_tensor_by_name('Const:0'), input_floats)
        self.assertEqual(
            graph.get_tensor_by_name('transform/truediv:0'),
            result_tensor)
Example #13
0
    def testImportAndExportSparse(self):
        # Export the function "z = x + y"
        def preprocessing_fn(inputs):
            return {'z': api.map(tf.sparse_add, inputs['x'], inputs['y'])}

        input_schema = self.toSchema({
            'x': tf.VarLenFeature(tf.float32),
            'y': tf.VarLenFeature(tf.float32)
        })

        inputs, outputs = impl_helper.run_preprocessing_fn(
            preprocessing_fn, input_schema)
        saved_model_dir = os.path.join(self.get_temp_dir(), 'sparse')
        _ = impl_helper.make_transform_fn_def(input_schema, inputs, outputs,
                                              saved_model_dir)

        # Import the function, applying it to constants for x and y.
        g = tf.Graph()
        with g.as_default():
            x = tf.SparseTensor(indices=[[0]],
                                values=tf.constant(5,
                                                   shape=(1, ),
                                                   dtype=tf.float32),
                                dense_shape=[1])
            y = tf.SparseTensor(indices=[[0]],
                                values=tf.constant(6,
                                                   shape=(1, ),
                                                   dtype=tf.float32),
                                dense_shape=[1])
            outputs = saved_transform_io.apply_saved_transform(
                saved_model_dir, {
                    'x': x,
                    'y': y
                })
            z = outputs['z']

            sess = tf.Session()
            with sess.as_default():
                # Check result is 5 + 6 = 11.
                result = z.eval()
                self.assertEqual(result.indices, [[0]])
                self.assertEqual(result.values, [11])
                self.assertEqual(result.dense_shape, [1])
  def test_dense_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.Graph().as_default():
      with tf.Session().as_default() as session:
        input_float = tf.placeholder(tf.float32)
        # show that unrelated & unmapped placeholders do not interfere
        tf.placeholder(tf.int64)
        output = input_float / 5.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.Graph().as_default():
      with tf.Session().as_default() as session:
        # Using a computed input gives confidence that the graphs are fused.
        input_float = tf.constant(25.0) * 2
        inputs = {'input': input_float}
        outputs = saved_transform_io.apply_saved_transform(export_path, inputs)
        result = session.run(outputs['output'])
        # (25 * 2) / 5 = 10
        self.assertEqual(10.0, result)
Example #15
0
    def raw_training_input_fn():
        """Training input function that reads raw data and applies transforms."""

        if isinstance(raw_data_file_pattern, six.string_types):
            filepath_list = [raw_data_file_pattern]
        else:
            filepath_list = raw_data_file_pattern

        files = []
        for path in filepath_list:
            files.extend(file_io.get_matching_files(path))

        filename_queue = tf.train.string_input_producer(
            files, num_epochs=num_epochs, shuffle=randomize_input)

        csv_id, csv_lines = tf.TextLineReader().read_up_to(
            filename_queue, training_batch_size)

        queue_capacity = (reader_num_threads +
                          3) * training_batch_size + min_after_dequeue
        if randomize_input:
            batch_csv_id, batch_csv_lines = tf.train.shuffle_batch(
                tensors=[csv_id, csv_lines],
                batch_size=training_batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                enqueue_many=True,
                num_threads=reader_num_threads)

        else:
            batch_csv_id, batch_csv_lines = tf.train.batch(
                tensors=[csv_id, csv_lines],
                batch_size=training_batch_size,
                capacity=queue_capacity,
                enqueue_many=True,
                num_threads=reader_num_threads)

        record_defaults = []
        for k in raw_keys:
            if column_schemas[k].representation.default_value is not None:
                # Note that the default_value could be 'false' value like  '' or 0
                value = tf.constant(
                    [column_schemas[k].representation.default_value],
                    dtype=column_schemas[k].domain.dtype)
            else:
                value = tf.constant([], dtype=column_schemas[k].domain.dtype)
            record_defaults.append(value)

        parsed_tensors = tf.decode_csv(batch_csv_lines,
                                       record_defaults,
                                       name='csv_to_tensors')

        raw_data = {k: v for k, v in zip(raw_keys, parsed_tensors)}

        transformed_data = saved_transform_io.apply_saved_transform(
            transform_savedmodel_dir, raw_data)

        transformed_features = {
            k: v
            for k, v in six.iteritems(transformed_data)
            if k not in transformed_label_keys
        }
        transformed_labels = {
            k: v
            for k, v in six.iteritems(transformed_data)
            if k in transformed_label_keys
        }

        if convert_scalars_to_vectors:
            transformed_features = input_fn_maker._convert_scalars_to_vectors(
                transformed_features)
            transformed_labels = input_fn_maker._convert_scalars_to_vectors(
                transformed_labels)

        # TODO(b/35264116): remove this when all estimators accept label dict
        if len(transformed_labels) == 1:
            (_, transformed_labels), = transformed_labels.items()
        return transformed_features, transformed_labels