def test_config_with_custom_name(self):
        layer = category_crossing.CategoryCrossing(depth=2, name='hashing')
        config = layer.get_config()
        layer_1 = category_crossing.CategoryCrossing.from_config(config)
        self.assertEqual(layer_1.name, layer.name)

        layer = category_crossing.CategoryCrossing(name='hashing')
        config = layer.get_config()
        layer_1 = category_crossing.CategoryCrossing.from_config(config)
        self.assertEqual(layer_1.name, layer.name)
 def test_crossing_sparse_inputs_depth_tuple(self):
     layer = category_crossing.CategoryCrossing(depth=(2, 3))
     inputs_0 = sparse_tensor.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                           values=['a', 'b', 'c'],
                                           dense_shape=[3, 1])
     inputs_1 = sparse_tensor.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                           values=['d', 'e', 'f'],
                                           dense_shape=[3, 1])
     inputs_2 = sparse_tensor.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                           values=['g', 'h', 'i'],
                                           dense_shape=[3, 1])
     inp_0_t = input_layer.Input(shape=(1, ),
                                 sparse=True,
                                 dtype=dtypes.string)
     inp_1_t = input_layer.Input(shape=(1, ),
                                 sparse=True,
                                 dtype=dtypes.string)
     inp_2_t = input_layer.Input(shape=(1, ),
                                 sparse=True,
                                 dtype=dtypes.string)
     out_t = layer([inp_0_t, inp_1_t, inp_2_t])
     model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t)
     output = model.predict([inputs_0, inputs_1, inputs_2])
     self.assertIsInstance(output, sparse_tensor.SparseTensor)
     output = sparse_ops.sparse_tensor_to_dense(output)
     expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']]
     expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']]
     expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']]
     expected_out = array_ops.concat(
         [expected_outputs_0, expected_outputs_1, expected_outputs_2],
         axis=0)
     self.assertAllEqual(expected_out, output)
  def bm_layer_implementation(self, batch_size):
    input_1 = keras.Input(shape=(1,), dtype=dtypes.int64, name="word")
    input_2 = keras.Input(shape=(1,), dtype=dtypes.int64, name="int")
    layer = category_crossing.CategoryCrossing()
    _ = layer([input_1, input_2])

    num_repeats = 5
    starts = []
    ends = []
    for _ in range(num_repeats):
      ds = dataset_ops.Dataset.from_generator(
          int_gen, (dtypes.int64, dtypes.int64),
          (tensor_shape.TensorShape([1]), tensor_shape.TensorShape([1])))
      ds = ds.shuffle(batch_size * 100)
      ds = ds.batch(batch_size)
      num_batches = 5
      ds = ds.take(num_batches)
      ds = ds.prefetch(num_batches)
      starts.append(time.time())
      # Benchmarked code begins here.
      for i in ds:
        _ = layer([i[0], i[1]])
      # Benchmarked code ends here.
      ends.append(time.time())

    avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
    name = "category_crossing|batch_%s" % batch_size
    baseline = self.run_dataset_implementation(batch_size)
    extras = {
        "dataset implementation baseline": baseline,
        "delta seconds": (baseline - avg_time),
        "delta percent": ((baseline - avg_time) / baseline) * 100
    }
    self.report_benchmark(
        iters=num_repeats, wall_time=avg_time, extras=extras, name=name)
Exemple #4
0
    def test_distribution(self, distribution):
        input_array_1 = np.array([['a', 'b'], ['c', 'd']])
        input_array_2 = np.array([['e', 'f'], ['g', 'h']])
        inp_dataset = dataset_ops.DatasetV2.from_tensor_slices({
            'input_1':
            input_array_1,
            'input_2':
            input_array_2
        })
        inp_dataset = batch_wrapper(inp_dataset, 2, distribution)

        # pyformat: disable
        expected_output = [[b'a_X_e', b'a_X_f', b'b_X_e', b'b_X_f'],
                           [b'c_X_g', b'c_X_h', b'd_X_g', b'd_X_h']]
        config.set_soft_device_placement(True)

        with distribution.scope():
            input_data_1 = keras.Input(shape=(2, ),
                                       dtype=dtypes.string,
                                       name='input_1')
            input_data_2 = keras.Input(shape=(2, ),
                                       dtype=dtypes.string,
                                       name='input_2')
            input_data = [input_data_1, input_data_2]
            layer = category_crossing.CategoryCrossing()
            int_data = layer(input_data)
            model = keras.Model(inputs=input_data, outputs=int_data)
        output_dataset = model.predict(inp_dataset)
        self.assertAllEqual(expected_output, output_dataset)
  def test_crossing_dense_inputs_depth_int(self):
    layer = category_crossing.CategoryCrossing(depth=1)
    inputs_0 = constant_op.constant([['a'], ['b'], ['c']])
    inputs_1 = constant_op.constant([['d'], ['e'], ['f']])
    output = layer([inputs_0, inputs_1])
    expected_output = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
    self.assertAllEqual(expected_output, output)

    layer = category_crossing.CategoryCrossing(depth=2)
    inp_0_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
    inp_1_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
    out_t = layer([inp_0_t, inp_1_t])
    model = training.Model([inp_0_t, inp_1_t], out_t)
    crossed_output = [[b'a_X_d'], [b'b_X_e'], [b'c_X_f']]
    expected_output = array_ops.concat([expected_output, crossed_output],
                                       axis=1)
    self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
  def test_crossing_ragged_inputs_depth_int(self):
    layer = category_crossing.CategoryCrossing(depth=1)
    inputs_0 = ragged_factory_ops.constant([['a'], ['b'], ['c']])
    inputs_1 = ragged_factory_ops.constant([['d'], ['e'], ['f']])
    output = layer([inputs_0, inputs_1])
    expected_output = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
    self.assertIsInstance(output, ragged_tensor.RaggedTensor)
    self.assertAllEqual(expected_output, output)

    layer = category_crossing.CategoryCrossing(depth=2)
    inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
    inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
    out_t = layer([inp_0_t, inp_1_t])
    model = training.Model([inp_0_t, inp_1_t], out_t)
    expected_output = [[b'a', b'd', b'a_X_d'], [b'b', b'e', b'b_X_e'],
                       [b'c', b'f', b'c_X_f']]
    self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
    def test_crossing_with_list_inputs(self):
        layer = category_crossing.CategoryCrossing()
        inputs_0 = [[1, 2]]
        inputs_1 = [[1, 3]]
        output = layer([inputs_0, inputs_1])
        self.assertAllEqual([[b'1_X_1', b'1_X_3', b'2_X_1', b'2_X_3']], output)

        inputs_0 = [1, 2]
        inputs_1 = [1, 3]
        output = layer([inputs_0, inputs_1])
        self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output)
 def test_crossing_sparse_inputs_empty_sep(self):
   layer = category_crossing.CategoryCrossing(separator='')
   inputs_0 = sparse_tensor.SparseTensor(
       indices=[[0, 0], [1, 0], [1, 1]],
       values=['a', 'b', 'c'],
       dense_shape=[2, 2])
   inputs_1 = sparse_tensor.SparseTensor(
       indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
   output = layer([inputs_0, inputs_1])
   self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
   self.assertAllEqual([b'ad', b'be', b'ce'], output.values)
 def test_crossing_sparse_inputs_depth_int(self):
     layer = category_crossing.CategoryCrossing(depth=1)
     inputs_0 = sparse_tensor.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                           values=['a', 'b', 'c'],
                                           dense_shape=[3, 1])
     inputs_1 = sparse_tensor.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                           values=['d', 'e', 'f'],
                                           dense_shape=[3, 1])
     output = layer([inputs_0, inputs_1])
     self.assertIsInstance(output, sparse_tensor.SparseTensor)
     output = sparse_ops.sparse_tensor_to_dense(output)
     expected_out = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
     self.assertAllEqual(expected_out, output)
 def test_crossing_compute_output_signature(self):
     input_shapes = [
         tensor_shape.TensorShape([2, 2]),
         tensor_shape.TensorShape([2, 3])
     ]
     input_specs = [
         tensor_spec.TensorSpec(input_shape, dtypes.string)
         for input_shape in input_shapes
     ]
     layer = category_crossing.CategoryCrossing()
     output_spec = layer.compute_output_signature(input_specs)
     self.assertEqual(output_spec.shape.dims[0], input_shapes[0].dims[0])
     self.assertEqual(output_spec.dtype, dtypes.string)
  def test_crossing_ragged_inputs(self):
    inputs_0 = ragged_factory_ops.constant(
        [['omar', 'skywalker'], ['marlo']],
        dtype=dtypes.string)
    inputs_1 = ragged_factory_ops.constant(
        [['a'], ['b']],
        dtype=dtypes.string)
    inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
    inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)

    non_hashed_layer = category_crossing.CategoryCrossing()
    out_t = non_hashed_layer([inp_0_t, inp_1_t])
    model = training.Model(inputs=[inp_0_t, inp_1_t], outputs=out_t)
    expected_output = [[b'omar_X_a', b'skywalker_X_a'], [b'marlo_X_b']]
    self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
def embedding_varlen(batch_size, max_length):
    """Benchmark a variable-length embedding."""
    # Data and constants.

    num_buckets = 10000
    vocab = fc_bm.create_vocabulary(32768)
    data_a = fc_bm.create_string_data(max_length,
                                      batch_size * NUM_REPEATS,
                                      vocab,
                                      pct_oov=0.0)
    data_b = fc_bm.create_string_data(max_length,
                                      batch_size * NUM_REPEATS,
                                      vocab,
                                      pct_oov=0.0)

    # Keras implementation
    input_1 = keras.Input(shape=(None, ), name="data_a", dtype=dt.string)
    input_2 = keras.Input(shape=(None, ), name="data_b", dtype=dt.string)
    crossed_data = category_crossing.CategoryCrossing()([input_1, input_2])
    hashed_data = hashing.Hashing(num_buckets)(crossed_data)
    model = keras.Model([input_1, input_2], hashed_data)

    # FC implementation
    fc = fcv2.crossed_column(["data_a", "data_b"], num_buckets)

    # Wrap the FC implementation in a tf.function for a fair comparison
    @tf_function()
    def fc_fn(tensors):
        fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None)

    # Benchmark runs
    keras_data = {
        "data_a":
        data_a.to_tensor(default_value="", shape=(batch_size, max_length)),
        "data_b":
        data_b.to_tensor(default_value="", shape=(batch_size, max_length)),
    }
    k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

    fc_data = {
        "data_a":
        data_a.to_tensor(default_value="", shape=(batch_size, max_length)),
        "data_b":
        data_b.to_tensor(default_value="", shape=(batch_size, max_length)),
    }
    fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

    return k_avg_time, fc_avg_time
 def test_crossing_dense_inputs_depth_tuple(self):
   layer = category_crossing.CategoryCrossing(depth=[2, 3])
   inputs_0 = constant_op.constant([['a'], ['b'], ['c']])
   inputs_1 = constant_op.constant([['d'], ['e'], ['f']])
   inputs_2 = constant_op.constant([['g'], ['h'], ['i']])
   inp_0_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
   inp_1_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
   inp_2_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
   out_t = layer([inp_0_t, inp_1_t, inp_2_t])
   model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t)
   expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']]
   expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']]
   expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']]
   expected_output = array_ops.concat(
       [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0)
   self.assertAllEqual(expected_output,
                       model.predict([inputs_0, inputs_1, inputs_2]))
 def test_crossing_with_dense_inputs(self):
     layer = category_crossing.CategoryCrossing()
     inputs_0 = np.asarray([[1, 2]])
     inputs_1 = np.asarray([[1, 3]])
     output = layer([inputs_0, inputs_1])
     self.assertAllEqual([[b'1_X_1', b'1_X_3', b'2_X_1', b'2_X_3']], output)