def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) expected_lookups = [ # example 0, ids [2] [[0., 0., 1.], [0., 0., 0.]], # example 1, ids [0, 1] [[1., 0., 0.], [0., 1., 0.]], # example 2, ids [] [[0., 0., 0.], [0., 0., 0.]], # example 3, ids [1] [[0., 1., 0.], [0., 0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [2] # example 2, ids [0, 1] # example 3, ids [] # example 4, ids [1] # example 5, ids [] indices=((1, 0), (2, 0), (2, 1), (4, 0)), values=(2, 0, 1, 1), dense_shape=(6, 2)) expected_sequence_length = [0, 1, 2, 0, 1, 0] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) vocabulary_size_b = 2 sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [1, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 1, 0), dense_shape=(2, 2)) expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [1, 0] [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = fc.indicator_column(categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = fc.indicator_column(categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[indicator_column_b, indicator_column_a]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) vocabulary_size_b = 2 sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [1, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 1, 0), dense_shape=(2, 2)) expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [1, 0] [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = fc.indicator_column(categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = fc.indicator_column(categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[indicator_column_b, indicator_column_a]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def testTrainEvaluateAndPredictWithIndicatorColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column('an_uninformative_feature', dtype=dtypes.float32), BUCKET_BOUNDARIES) labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) # Our categorical feature defines the labels perfectly input_fn = numpy_io.numpy_input_fn(x={ 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), }, y=labels, batch_size=5, shuffle=False) # Train depth 1 tree. est = boosted_trees.BoostedTreesRegressor( feature_columns=[bucketized_col, feature_indicator], n_batches_per_layer=1, n_trees=1, learning_rate=1.0, max_depth=1) num_steps = 1 est.train(input_fn, steps=num_steps) ensemble = self._assert_checkpoint_and_return_model(est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1) # We learnt perfectly. eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['loss'], 0) predictions = list(est.predict(input_fn)) self.assertAllClose(labels, [pred['predictions'] for pred in predictions]) self.assertEqual(3, len(ensemble.trees[0].nodes)) # Check that the split happened on 'good' value, which will be encoded as # feature with index 2 (0-numeric, 1 - 'bad') self.assertEqual( 2, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
def test_forward_in_exported_sparse(self): features_columns = [ fc.indicator_column( fc.categorical_column_with_vocabulary_list('x', range(10))) ] classifier = linear.LinearClassifier(feature_columns=features_columns) def train_input_fn(): dataset = dataset_ops.Dataset.from_tensors({ 'x': sparse_tensor.SparseTensor(values=[1, 2, 3], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]), 'labels': [[0], [1]] }) def _split(x): labels = x.pop('labels') return x, labels dataset = dataset.map(_split) return dataset classifier.train(train_input_fn, max_steps=1) classifier = extenders.forward_features(classifier, keys=['x'], sparse_default_values={'x': 0}) def serving_input_fn(): features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x', shape=[None]) features = {'x': layers.dense_to_sparse(features_ph)} return estimator_lib.export.ServingInputReceiver( features, {'x': features_ph}) export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn) prediction_fn = from_saved_model(export_dir, signature_def_key='predict') features = (0, 2) prediction = prediction_fn({'x': features}) self.assertIn('x', prediction) self.assertEqual(features, tuple(prediction['x'])) gfile.DeleteRecursively(tmpdir)
def testTrainEvaluateAndPredictWithIndicatorColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column( 'an_uninformative_feature', dtype=dtypes.float32), BUCKET_BOUNDARIES) labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) # Our categorical feature defines the labels perfectly input_fn = numpy_io.numpy_input_fn( x={ 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), }, y=labels, batch_size=5, shuffle=False) # Train depth 1 tree. est = boosted_trees.BoostedTreesRegressor( feature_columns=[bucketized_col, feature_indicator], n_batches_per_layer=1, n_trees=1, learning_rate=1.0, max_depth=1) num_steps = 1 est.train(input_fn, steps=num_steps) ensemble = self._assert_checkpoint_and_return_model( est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1) # We learnt perfectly. eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['loss'], 0) predictions = list(est.predict(input_fn)) self.assertAllClose( labels, [pred['predictions'] for pred in predictions]) self.assertEqual(3, len(ensemble.trees[0].nodes)) # Check that the split happened on 'good' value, which will be encoded as # feature with index 2 (0-numeric, 1 - 'bad') self.assertEqual(2, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer(features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def test_sequence_length(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer( features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def test_forward_in_exported_sparse(self): features_columns = [fc.indicator_column( fc.categorical_column_with_vocabulary_list('x', range(10)))] classifier = linear.LinearClassifier(feature_columns=features_columns) def train_input_fn(): dataset = dataset_ops.Dataset.from_tensors({ 'x': sparse_tensor.SparseTensor( values=[1, 2, 3], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]), 'labels': [[0], [1]] }) def _split(x): labels = x.pop('labels') return x, labels dataset = dataset.map(_split) return dataset classifier.train(train_input_fn, max_steps=1) classifier = extenders.forward_features( classifier, keys=['x'], sparse_default_values={'x': 0}) def serving_input_fn(): features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x', shape=[None]) features = {'x': layers.dense_to_sparse(features_ph)} return estimator_lib.export.ServingInputReceiver(features, {'x': features_ph}) export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn) prediction_fn = from_saved_model(export_dir, signature_def_key='predict') features = (0, 2) prediction = prediction_fn({'x': features}) self.assertIn('x', prediction) self.assertEqual(features, tuple(prediction['x'])) gfile.DeleteRecursively(tmpdir)
def _sequence_indicator_column(categorical_column): """Returns a feature column that represents sequences of multi-hot tensors. Use this to convert sequence categorical data into dense representation for input to sequence NN, such as RNN. Example: ```python colors = sequence_categorical_column_with_vocabulary_list( key='colors', vocabulary_list=('R', 'G', 'B', 'Y')) colors_indicator = _sequence_indicator_column(colors) columns = [colors] features = tf.parse_example(..., features=make_parse_example_spec(columns)) input_layer, sequence_length = sequence_input_layer(features, columns) rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) outputs, state = tf.nn.dynamic_rnn( rnn_cell, inputs=input_layer, sequence_length=sequence_length) ``` Args: categorical_column: A `_SequenceCategoricalColumn` created with a `sequence_cateogrical_column_with_*` function. Returns: A `_SequenceCategoricalToDenseColumn`. Raises: ValueError: If `categorical_column` is not the right type. """ if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' 'Given (type {}): {}'.format( type(categorical_column), categorical_column)) return _SequenceCategoricalToDenseColumn( fc.indicator_column(categorical_column))
def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], [<ignored-padding>]] = [[0.92, -0.88], [<ignored-padding>]] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ base_global_step = 100 create_checkpoint( # FeatureColumns are sorted alphabetically, so on_sale weights are # inserted before price. rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': sparse_tensor.SparseTensor(values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = seq_fc.sequence_numeric_column('price', shape=(1, )) on_sale_column = fc.indicator_column( seq_fc.sequence_categorical_column_with_identity('on_sale', num_buckets=2)) sequence_feature_columns = [price_column, on_sale_column] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-1.5056], [-0.7962]])
from tensorflow.contrib.learn import LinearRegressor, pandas_input_fn, DNNRegressor, Experiment from tensorflow.python.feature_column.feature_column import categorical_column_with_hash_bucket, numeric_column, \ categorical_column_with_vocabulary_list, embedding_column, indicator_column make = categorical_column_with_hash_bucket('make', 100) horsepower = numeric_column('horsepower', shape=[]) cylinders = categorical_column_with_vocabulary_list( 'num-of-cylinders', ['two', 'three', 'four', 'six', 'eight']) ############### regressor = DNNRegressor(feature_columns=[ embedding_column(make, 10), horsepower, indicator_column(cylinders, 3) ], hidden_units=[50, 30, 10]) ################ regressor = LinearRegressor(feature_columns=[make, horsepower, cylinders]) # any python generator train_input_fn = pandas_input_fn(x=input_data, y=input_label, batch_size=64, shuffle=True, num_epochs=None) regressor.train(train_input_fn, steps=10000) def expirement_fn(run_config, hparams): regressor = DNNRegressor(..., config=run_config,
def test_complete_flow(self): n_classes = 3 input_dimension = 2 batch_size = 12 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) categorical_data = np.random.random_integers( 0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, batch_size=batch_size, shuffle=False) feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)), feature_column.indicator_column( feature_column.categorical_column_with_vocabulary_list( 'categories', vocabulary_list=np.linspace( 0., len(x_data), len(x_data), dtype=np.int64))) ] estimator = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], [<ignored-padding>]] = [[0.92, -0.88], [<ignored-padding>]] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ base_global_step = 100 create_checkpoint( # FeatureColumns are sorted alphabetically, so on_sale weights are # inserted before price. rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': sparse_tensor.SparseTensor( values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = seq_fc.sequence_numeric_column('price', shape=(1,)) on_sale_column = fc.indicator_column( seq_fc.sequence_categorical_column_with_identity( 'on_sale', num_buckets=2)) sequence_feature_columns = [price_column, on_sale_column] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-1.5056], [-0.7962]])
def _build_feature_columns(self, ): multi_hot_feature_columns = {} multi_hot_feature_columns_deep = {} multi_category_feature_columns = {} continuous_feature_columns = {} crossed_feature_columns = [] bucketized_feature_columns = [] embedding_feature_columns = [] if self._data_conf.multi_hot_columns is not None: for column in self._data_conf.multi_hot_columns: multi_hot_feature_columns[ column] = categorical_column_with_vocabulary_list( column, self._data_conf.multi_hot_columns[column], dtype=tf.string) multi_hot_feature_columns_deep[column] = indicator_column( multi_hot_feature_columns[column]) if self._data_conf.multi_category_columns is not None: multi_category_feature_columns = { column: categorical_column_with_hash_bucket(column, hash_bucket_size=1000) for column in self._data_conf.multi_category_columns } if self._data_conf.continuous_columns is not None: continuous_feature_columns = { column: numeric_column(column) for column in self._data_conf.continuous_columns } if self._data_conf.crossed_columns is not None: crossed_feature_columns = [ crossed_column(_, hash_bucket_size=100000) for _ in self._data_conf.crossed_columns ] if self._data_conf.bucketized_columns is not None: [ bucketized_feature_columns.append( bucketized_column(continuous_feature_columns[column], boundaries=boundary)) for column, boundary in self._data_conf.bucketized_columns.items ] if len(multi_category_feature_columns) > 0: embedding_feature_columns = [ embedding_column( _, dimension=self._model_conf.embedding_dimension) for _ in multi_category_feature_columns.values() ] self._feature_mapping = { 0: list(multi_hot_feature_columns.values()), 1: list(multi_category_feature_columns.values()), 2: list(continuous_feature_columns.values()), 3: crossed_feature_columns, 4: bucketized_feature_columns, 5: embedding_feature_columns, 6: list(multi_hot_feature_columns_deep.values()) } self._build_feature_columns_for_model()