def test_legacy_dtype_compat(self):
     inputs = keras.Input(batch_size=16, shape=(4, ), dtype="float32")
     layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5],
                                           dtype="float32")
     outputs = layer(inputs)
     self.assertAllEqual(outputs.dtype, tf.int64)
     # In TF1 we sometimes face an explicit dtype=None in the config.
     layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5],
                                           dtype=None)
     outputs = layer(inputs)
     self.assertAllEqual(outputs.dtype, tf.int64)
Exemple #2
0
    def bm_adapt_implementation(self, num_elements, batch_size):
        """Test the KPL adapt implementation."""
        input_t = keras.Input(shape=(1, ), dtype=tf.float32)
        layer = discretization.Discretization()
        _ = layer(input_t)

        num_repeats = 5
        starts = []
        ends = []
        for _ in range(num_repeats):
            ds = tf.data.Dataset.range(num_elements)
            ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
            ds = ds.batch(batch_size)

            starts.append(time.time())
            # Benchmarked code begins here.
            layer.adapt(ds)
            # Benchmarked code ends here.
            ends.append(time.time())

        avg_time = np.mean(np.array(ends) - np.array(starts))
        name = "discretization_adapt|%s_elements|batch_%s" % (
            num_elements,
            batch_size,
        )
        baseline = self.run_dataset_implementation(num_elements, batch_size)
        extras = {
            "tf.data implementation baseline": baseline,
            "delta seconds": (baseline - avg_time),
            "delta percent": ((baseline - avg_time) / baseline) * 100,
        }
        self.report_benchmark(iters=num_repeats,
                              wall_time=avg_time,
                              extras=extras,
                              name=name)
    def test_layer_computation(self,
                               adapt_data,
                               test_data,
                               use_dataset,
                               expected,
                               num_bins=5,
                               epsilon=0.01):

        input_shape = tuple(list(test_data.shape)[1:])
        np.random.shuffle(adapt_data)
        if use_dataset:
            # Keras APIs expect batched datasets
            adapt_data = tf.data.Dataset.from_tensor_slices(adapt_data).batch(
                test_data.shape[0] // 2)
            test_data = tf.data.Dataset.from_tensor_slices(test_data).batch(
                test_data.shape[0] // 2)

        layer = discretization.Discretization(epsilon=epsilon,
                                              num_bins=num_bins)
        layer.adapt(adapt_data)

        input_data = keras.Input(shape=input_shape)
        output = layer(input_data)
        model = keras.Model(input_data, output)
        model._run_eagerly = test_utils.should_run_eagerly()
        output_data = model.predict(test_data)
        self.assertAllClose(expected, output_data)
def embedding_varlen(batch_size, max_length):
    """Benchmark a variable-length embedding."""
    # Data and constants.
    max_value = 25.0
    bins = np.arange(1.0, max_value)
    data = fc_bm.create_data(
        max_length, batch_size * NUM_REPEATS, 100000, dtype=float
    )

    # Keras implementation
    model = keras.Sequential()
    model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.float32))
    model.add(discretization.Discretization(bins))

    # FC implementation
    fc = tf.feature_column.bucketized_column(
        tf.feature_column.numeric_column("data"), boundaries=list(bins)
    )

    # Wrap the FC implementation in a tf.function for a fair comparison
    @tf_function()
    def fc_fn(tensors):
        fc.transform_feature(
            tf.__internal__.feature_column.FeatureTransformationCache(tensors),
            None,
        )

    # Benchmark runs
    keras_data = {"data": data.to_tensor(default_value=0.0)}
    k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

    fc_data = {"data": data.to_tensor(default_value=0.0)}
    fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

    return k_avg_time, fc_avg_time
    def run_dataset_implementation(self, num_elements, batch_size):
        input_t = keras.Input(shape=(1, ))
        layer = discretization.Discretization()
        _ = layer(input_t)

        num_repeats = 5
        starts = []
        ends = []
        for _ in range(num_repeats):
            ds = tf.data.Dataset.range(num_elements)
            ds = ds.map(
                lambda x: tf.compat.v1.expand_dims(tf.cast(x, tf.float32), -1))
            ds = ds.batch(batch_size)

            starts.append(time.time())
            # Benchmarked code begins here.
            state = ds.reduce((np.zeros((1, 2)), ), reduce_fn)

            bins = discretization.get_bucket_boundaries(state, 100)
            layer.set_weights([bins])
            # Benchmarked code ends here.
            ends.append(time.time())

        avg_time = np.mean(np.array(ends) - np.array(starts))
        return avg_time
    def test_saved_model_tf(self):
        input_data = [[1], [2], [3]]
        predict_data = [[0.5], [1.5], [2.5]]
        expected_output = [[0], [1], [2]]

        inputs = keras.Input(shape=(1, ), dtype=tf.float32)
        layer = discretization.Discretization(num_bins=3)
        layer.adapt(input_data)
        outputs = layer(inputs)
        model = keras.Model(inputs=inputs, outputs=outputs)

        output_data = model.predict(predict_data)
        self.assertAllClose(output_data, expected_output)

        # Save the model to disk.
        output_path = os.path.join(self.get_temp_dir(), "tf_saved_model")
        tf.saved_model.save(model, output_path)
        loaded_model = tf.saved_model.load(output_path)
        f = loaded_model.signatures["serving_default"]

        # Ensure that the loaded model is unique (so that the save/load is real)
        self.assertIsNot(model, loaded_model)

        # Validate correctness of the new model.
        new_output_data = f(tf.constant(predict_data))["discretization"]
        self.assertAllClose(new_output_data, expected_output)
 def test_one_hot_output_dtype(self, dtype):
     inputs = keras.Input(batch_size=16, shape=(1, ), dtype="float32")
     layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5],
                                           output_mode="one_hot",
                                           dtype=dtype)
     outputs = layer(inputs)
     self.assertAllEqual(outputs.dtype, dtype)
  def test_bucketize_with_explicit_buckets_sparse_float_input(self):
    indices = [[0, 1], [0, 2], [1, 1]]
    input_array = tf.SparseTensor(
        indices=indices, values=[-1.5, 1.0, 3.4], dense_shape=[2, 3])
    expected_output = [0, 2, 3]
    input_data = keras.Input(shape=(3,), dtype=tf.float32, sparse=True)
    layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
    bucket_data = layer(input_data)

    model = keras.Model(inputs=input_data, outputs=bucket_data)
    output_dataset = model.predict(input_array, steps=1)
    self.assertAllEqual(indices, output_dataset.indices)
    self.assertAllEqual(expected_output, output_dataset.values)
    def test_bucketize_with_explicit_buckets_integer(self):
        input_array = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])

        expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]]
        expected_output_shape = [None, 4]

        input_data = keras.Input(shape=(4, ))
        layer = discretization.Discretization(bin_boundaries=[0., 1., 2.])
        bucket_data = layer(input_data)
        self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())

        model = keras.Model(inputs=input_data, outputs=bucket_data)
        output_dataset = model.predict(input_array)
        self.assertAllEqual(expected_output, output_dataset)
Exemple #10
0
  def test_merge_state(self):
    data = np.arange(300)
    partial_ds_1 = tf.data.Dataset.from_tensor_slices(data[:100])
    partial_ds_2 = tf.data.Dataset.from_tensor_slices(data[100:200])
    partial_ds_3 = tf.data.Dataset.from_tensor_slices(data[200:])
    full_ds = partial_ds_1.concatenate(partial_ds_2).concatenate(partial_ds_3)

    # Use a higher epsilon to avoid any discrepencies from the quantile
    # approximation.
    full_layer = discretization.Discretization(num_bins=3, epsilon=0.001)
    full_layer.adapt(full_ds.batch(2))

    partial_layer_1 = discretization.Discretization(num_bins=3, epsilon=0.001)
    partial_layer_1.adapt(partial_ds_1.batch(2))
    partial_layer_2 = discretization.Discretization(num_bins=3, epsilon=0.001)
    partial_layer_2.adapt(partial_ds_2.batch(2))
    partial_layer_3 = discretization.Discretization(num_bins=3, epsilon=0.001)
    partial_layer_3.adapt(partial_ds_3.batch(2))
    partial_layer_1.merge_state([partial_layer_2, partial_layer_3])
    merged_layer = partial_layer_1

    data = np.arange(300)
    self.assertAllClose(full_layer(data), merged_layer(data))
Exemple #11
0
  def test_bucketize_with_explicit_buckets_int_input(self):
    input_array = np.array([[-1, 1, 3, 0], [0, 3, 1, 0]], dtype=np.int64)

    expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]]
    expected_output_shape = [None, 4]

    input_data = keras.Input(shape=(4,), dtype=tf.int64)
    layer = discretization.Discretization(bins=[-.5, 0.5, 1.5])
    bucket_data = layer(input_data)
    self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())

    model = keras.Model(inputs=input_data, outputs=bucket_data)
    output_dataset = model.predict(input_array)
    self.assertAllEqual(expected_output, output_dataset)
    def test_bucketize_with_explicit_buckets_ragged_int_input(self):
        input_array = tf.ragged.constant([[-1, 1, 3, 0], [0, 3, 1]],
                                         dtype=tf.int64)

        expected_output = [[0, 2, 3, 1], [1, 3, 2]]
        expected_output_shape = [None, None]

        input_data = keras.Input(shape=(None, ), ragged=True, dtype=tf.int64)
        layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
        bucket_data = layer(input_data)
        self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())
        model = keras.Model(inputs=input_data, outputs=bucket_data)
        output_dataset = model.predict(input_array)
        self.assertAllEqual(expected_output, output_dataset)
Exemple #13
0
  def test_bucketize_with_explicit_buckets_ragged_float_input(self):
    input_array = tf.ragged.constant([[-1.5, 1.0, 3.4, .5],
                                               [0.0, 3.0, 1.3]])

    expected_output = [[0, 1, 3, 1], [0, 3, 2]]
    expected_output_shape = [None, None]

    input_data = keras.Input(shape=(None,), ragged=True)
    layer = discretization.Discretization(bins=[0., 1., 2.])
    bucket_data = layer(input_data)
    self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())

    model = keras.Model(inputs=input_data, outputs=bucket_data)
    output_dataset = model.predict(input_array)
    self.assertAllEqual(expected_output, output_dataset)
    def test_count_output(self):
        input_data = np.array([-1.5, 1.0, 3.4, 3.5])

        expected_output = [1., 0., 1., 2.]
        expected_output_shape = [None, 4]

        inputs = keras.Input(shape=(4, ))
        layer = discretization.Discretization(bin_boundaries=[0., 1., 2.],
                                              output_mode="count")
        outputs = layer(inputs)
        self.assertAllEqual(expected_output_shape, outputs.shape.as_list())

        model = keras.Model(inputs, outputs)
        output_data = model(input_data)
        self.assertAllEqual(expected_output, output_data)
Exemple #15
0
  def test_distribution(self, distribution):
    input_array = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])

    expected_output = [[0, 1, 3, 1], [0, 3, 2, 0]]
    expected_output_shape = [None, 4]

    tf.config.set_soft_device_placement(True)

    with distribution.scope():
      input_data = keras.Input(shape=(4,))
      layer = discretization.Discretization(bins=[0., 1., 2.])
      bucket_data = layer(input_data)
      self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())

      model = keras.Model(inputs=input_data, outputs=bucket_data)
    output_dataset = model.predict(input_array)
    self.assertAllEqual(expected_output, output_dataset)
    def test_multiple_adapts(self):
        first_adapt = [[1], [2], [3]]
        second_adapt = [[4], [5], [6]]
        predict_input = [[2], [2]]
        expected_first_output = [[2], [2]]
        expected_second_output = [[0], [0]]

        inputs = keras.Input(shape=(1, ), dtype=tf.int32)
        layer = discretization.Discretization(num_bins=3)
        layer.adapt(first_adapt)
        outputs = layer(inputs)
        model = keras.Model(inputs=inputs, outputs=outputs)

        actual_output = model.predict(predict_input)
        self.assertAllClose(actual_output, expected_first_output)

        # Re-adapt the layer on new inputs.
        layer.adapt(second_adapt)
        # Re-compile the model.
        model.compile()
        # `predict` should now use the new model state.
        actual_output = model.predict(predict_input)
        self.assertAllClose(actual_output, expected_second_output)
Exemple #17
0
 def test_output_dtype(self):
   input_data = keras.Input(batch_size=16, shape=(4,), dtype=tf.int64)
   layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
   output = layer(input_data)
   self.assertAllEqual(output.dtype, tf.int64)
Exemple #18
0
 def test_merge_with_unadapted_layers_fails(self):
   layer1 = discretization.Discretization(num_bins=2, name="layer1")
   layer1.adapt([1, 2, 3])
   layer2 = discretization.Discretization(num_bins=2, name="layer2")
   with self.assertRaisesRegex(ValueError, "Cannot merge.*layer2"):
     layer1.merge_state([layer2])
 def test_output_shape(self):
     inputs = keras.Input(batch_size=16, shape=(4, ), dtype=tf.int64)
     layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
     outputs = layer(inputs)
     self.assertAllEqual(outputs.shape.as_list(), [16, 4])
 def test_num_bins_and_bins_set_fails(self):
     with self.assertRaisesRegex(
             ValueError,
             r"`num_bins` and `bin_boundaries` should not be set.*5.*\[1, 2\]"
     ):
         _ = discretization.Discretization(num_bins=5, bins=[1, 2])
 def test_num_bins_negative_fails(self):
     with self.assertRaisesRegex(ValueError,
                                 "`num_bins` must be.*num_bins=-7"):
         _ = discretization.Discretization(num_bins=-7)