def test_build_dataset(self): config = data_lib.DataConfig(split='train', fake_data=True) dataset = data_lib.build_dataset(config, batch_size=8, is_training=False, fake_training=False) # Check output_shapes. features_shapes, label_shape = dataset.output_shapes self.assertEqual([None], label_shape.as_list()) expected_keys = [ data_lib.feature_name(i) for i in range(1, data_lib.NUM_TOTAL_FEATURES + 1) ] self.assertSameElements(expected_keys, list(features_shapes.keys())) for key, shape in six.iteritems(features_shapes): self.assertEqual([None], shape.as_list(), 'Unexpected shape at key=' + key) # Check output_types. features_types, label_type = tf.compat.v1.data.get_output_types( dataset) self.assertEqual(tf.float32, label_type) for idx in data_lib.INT_FEATURE_INDICES: self.assertEqual(tf.float32, features_types[data_lib.feature_name(idx)]) for idx in data_lib.CAT_FEATURE_INDICES: self.assertEqual(tf.string, features_types[data_lib.feature_name(idx)])
def _make_input_layers(): out = {} for idx in range(1, data_lib.NUM_TOTAL_FEATURES + 1): dtype = tf.int32 if idx <= data_lib.NUM_INT_FEATURES else tf.string name = data_lib.feature_name(idx) out[name] = keras.layers.Input([], dtype=dtype, name=name) return out
def make_input_layers(): """Defines an input layer for Keras model with int32 and string dtypes.""" out = {} for idx in range(1, data_lib.NUM_TOTAL_FEATURES + 1): dtype = tf.int32 if idx <= data_lib.NUM_INT_FEATURES else tf.string name = data_lib.feature_name(idx) out[name] = keras.layers.Input([], dtype=dtype, name=name) return out
def make_feature_columns(opts): """Build feature_columns for converting features to a dense vector.""" tffc = tf.feature_column out_cat = [] for idx in data_lib.CAT_FEATURE_INDICES: name = data_lib.feature_name(idx) cat_idx = idx - data_lib.NUM_INT_FEATURES - 1 num_buckets = opts.num_hash_buckets[cat_idx] num_embed_dims = opts.num_embed_dims[cat_idx] hash_col = tffc.categorical_column_with_hash_bucket(name, num_buckets) cat_col = (tffc.embedding_column(hash_col, num_embed_dims) if num_embed_dims else tffc.indicator_column(hash_col)) out_cat.append(cat_col) out_int = [] for idx in data_lib.INT_FEATURE_INDICES: name = data_lib.feature_name(idx) out_int.append(tffc.numeric_column(name)) return out_int, out_cat