def test_get_sequence_dense_tensor(self):
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        # example 2, ids []
        # example 3, ids [1]
        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(4, 2))

    expected_lookups = [
        # example 0, ids [2]
        [[0., 0., 1.], [0., 0., 0.]],
        # example 1, ids [0, 1]
        [[1., 0., 0.], [0., 1., 0.]],
        # example 2, ids []
        [[0., 0., 0.], [0., 0., 0.]],
        # example 3, ids [1]
        [[0., 1., 0.], [0., 0., 0.]],
    ]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column = fc.indicator_column(categorical_column)

    indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
  def test_sequence_length_with_empty_rows(self):
    """Tests _sequence_length when some examples do not have ids."""
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids []
        # example 1, ids [2]
        # example 2, ids [0, 1]
        # example 3, ids []
        # example 4, ids [1]
        # example 5, ids []
        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(6, 2))
    expected_sequence_length = [0, 1, 2, 0, 1, 0]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column = fc.indicator_column(categorical_column)

    _, sequence_length = indicator_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
Esempio n. 3
0
    def test_indicator_column(self):
        vocabulary_size_a = 3
        sparse_input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        vocabulary_size_b = 2
        sparse_input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [1]
            # example 1, ids [1, 0]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(1, 1, 0),
            dense_shape=(2, 2))

        expected_input_layer = [
            # example 0, ids_a [2], ids_b [1]
            [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
            # example 1, ids_a [0, 1], ids_b [1, 0]
            [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]],
        ]
        expected_sequence_length = [1, 2]

        categorical_column_a = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size_a)
        indicator_column_a = fc.indicator_column(categorical_column_a)
        categorical_column_b = sfc.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size_b)
        indicator_column_b = fc.indicator_column(categorical_column_b)
        input_layer, sequence_length = sfc.sequence_input_layer(
            features={
                'aaa': sparse_input_a,
                'bbb': sparse_input_b,
            },
            # Test that columns are reordered alphabetically.
            feature_columns=[indicator_column_b, indicator_column_a])

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_input_layer,
                                input_layer.eval(session=sess))
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
  def test_indicator_column(self):
    vocabulary_size_a = 3
    sparse_input_a = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))
    vocabulary_size_b = 2
    sparse_input_b = sparse_tensor.SparseTensorValue(
        # example 0, ids [1]
        # example 1, ids [1, 0]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(1, 1, 0),
        dense_shape=(2, 2))

    expected_input_layer = [
        # example 0, ids_a [2], ids_b [1]
        [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
        # example 1, ids_a [0, 1], ids_b [1, 0]
        [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]],
    ]
    expected_sequence_length = [1, 2]

    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size_a)
    indicator_column_a = fc.indicator_column(categorical_column_a)
    categorical_column_b = sfc.sequence_categorical_column_with_identity(
        key='bbb', num_buckets=vocabulary_size_b)
    indicator_column_b = fc.indicator_column(categorical_column_b)
    input_layer, sequence_length = sfc.sequence_input_layer(
        features={
            'aaa': sparse_input_a,
            'bbb': sparse_input_b,
        },
        # Test that columns are reordered alphabetically.
        feature_columns=[indicator_column_b, indicator_column_a])

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
    def testTrainEvaluateAndPredictWithIndicatorColumn(self):
        categorical = feature_column.categorical_column_with_vocabulary_list(
            key='categorical', vocabulary_list=('bad', 'good', 'ok'))
        feature_indicator = feature_column.indicator_column(categorical)
        bucketized_col = feature_column.bucketized_column(
            feature_column.numeric_column('an_uninformative_feature',
                                          dtype=dtypes.float32),
            BUCKET_BOUNDARIES)

        labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
        # Our categorical feature defines the labels perfectly
        input_fn = numpy_io.numpy_input_fn(x={
            'an_uninformative_feature':
            np.array([1, 1, 1, 1, 1]),
            'categorical':
            np.array(['bad', 'good', 'good', 'ok', 'bad']),
        },
                                           y=labels,
                                           batch_size=5,
                                           shuffle=False)

        # Train depth 1 tree.
        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=[bucketized_col, feature_indicator],
            n_batches_per_layer=1,
            n_trees=1,
            learning_rate=1.0,
            max_depth=1)

        num_steps = 1
        est.train(input_fn, steps=num_steps)
        ensemble = self._assert_checkpoint_and_return_model(est.model_dir,
                                                            global_step=1,
                                                            finalized_trees=1,
                                                            attempted_layers=1)

        # We learnt perfectly.
        eval_res = est.evaluate(input_fn=input_fn, steps=1)
        self.assertAllClose(eval_res['loss'], 0)

        predictions = list(est.predict(input_fn))
        self.assertAllClose(labels,
                            [pred['predictions'] for pred in predictions])

        self.assertEqual(3, len(ensemble.trees[0].nodes))

        # Check that the split happened on 'good' value, which will be encoded as
        # feature with index 2 (0-numeric, 1 - 'bad')
        self.assertEqual(
            2, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
        self.assertEqual(0,
                         ensemble.trees[0].nodes[0].bucketized_split.threshold)
Esempio n. 6
0
    def test_forward_in_exported_sparse(self):
        features_columns = [
            fc.indicator_column(
                fc.categorical_column_with_vocabulary_list('x', range(10)))
        ]

        classifier = linear.LinearClassifier(feature_columns=features_columns)

        def train_input_fn():
            dataset = dataset_ops.Dataset.from_tensors({
                'x':
                sparse_tensor.SparseTensor(values=[1, 2, 3],
                                           indices=[[0, 0], [1, 0], [1, 1]],
                                           dense_shape=[2, 2]),
                'labels': [[0], [1]]
            })

            def _split(x):
                labels = x.pop('labels')
                return x, labels

            dataset = dataset.map(_split)
            return dataset

        classifier.train(train_input_fn, max_steps=1)

        classifier = extenders.forward_features(classifier,
                                                keys=['x'],
                                                sparse_default_values={'x': 0})

        def serving_input_fn():
            features_ph = array_ops.placeholder(dtype=dtypes.int32,
                                                name='x',
                                                shape=[None])
            features = {'x': layers.dense_to_sparse(features_ph)}
            return estimator_lib.export.ServingInputReceiver(
                features, {'x': features_ph})

        export_dir, tmpdir = self._export_estimator(classifier,
                                                    serving_input_fn)
        prediction_fn = from_saved_model(export_dir,
                                         signature_def_key='predict')

        features = (0, 2)
        prediction = prediction_fn({'x': features})

        self.assertIn('x', prediction)
        self.assertEqual(features, tuple(prediction['x']))
        gfile.DeleteRecursively(tmpdir)
Esempio n. 7
0
  def testTrainEvaluateAndPredictWithIndicatorColumn(self):
    categorical = feature_column.categorical_column_with_vocabulary_list(
        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
    feature_indicator = feature_column.indicator_column(categorical)
    bucketized_col = feature_column.bucketized_column(
        feature_column.numeric_column(
            'an_uninformative_feature', dtype=dtypes.float32),
        BUCKET_BOUNDARIES)

    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
    # Our categorical feature defines the labels perfectly
    input_fn = numpy_io.numpy_input_fn(
        x={
            'an_uninformative_feature': np.array([1, 1, 1, 1, 1]),
            'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
        },
        y=labels,
        batch_size=5,
        shuffle=False)

    # Train depth 1 tree.
    est = boosted_trees.BoostedTreesRegressor(
        feature_columns=[bucketized_col, feature_indicator],
        n_batches_per_layer=1,
        n_trees=1,
        learning_rate=1.0,
        max_depth=1)

    num_steps = 1
    est.train(input_fn, steps=num_steps)
    ensemble = self._assert_checkpoint_and_return_model(
        est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1)

    # We learnt perfectly.
    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['loss'], 0)

    predictions = list(est.predict(input_fn))
    self.assertAllClose(
        labels,
        [pred['predictions'] for pred in predictions])

    self.assertEqual(3, len(ensemble.trees[0].nodes))

    # Check that the split happened on 'good' value, which will be encoded as
    # feature with index 2 (0-numeric, 1 - 'bad')
    self.assertEqual(2, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
    self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
Esempio n. 8
0
    def test_indicator_column(self):
        """Tests that error is raised for sequence indicator column."""
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))

        categorical_column_a = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        indicator_column_a = fc.indicator_column(categorical_column_a)

        with self.assertRaisesRegexp(
                ValueError,
                r'In indicator_column: aaa_indicator\. categorical_column must not be '
                r'of type _SequenceCategoricalColumn\.'):
            _ = fc.input_layer(features={'aaa': sparse_input},
                               feature_columns=[indicator_column_a])
  def test_sequence_length(self):
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))
    expected_sequence_length = [1, 2]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column = fc.indicator_column(categorical_column)

    _, sequence_length = indicator_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      sequence_length = sess.run(sequence_length)
      self.assertAllEqual(expected_sequence_length, sequence_length)
      self.assertEqual(np.int64, sequence_length.dtype)
  def test_indicator_column(self):
    """Tests that error is raised for sequence indicator column."""
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))

    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column_a = fc.indicator_column(categorical_column_a)

    with self.assertRaisesRegexp(
        ValueError,
        r'In indicator_column: aaa_indicator\. categorical_column must not be '
        r'of type _SequenceCategoricalColumn\.'):
      _ = fc.input_layer(
          features={'aaa': sparse_input},
          feature_columns=[indicator_column_a])
Esempio n. 11
0
  def test_forward_in_exported_sparse(self):
    features_columns = [fc.indicator_column(
        fc.categorical_column_with_vocabulary_list('x', range(10)))]

    classifier = linear.LinearClassifier(feature_columns=features_columns)

    def train_input_fn():
      dataset = dataset_ops.Dataset.from_tensors({
          'x': sparse_tensor.SparseTensor(
              values=[1, 2, 3],
              indices=[[0, 0], [1, 0], [1, 1]],
              dense_shape=[2, 2]),
          'labels': [[0], [1]]
      })
      def _split(x):
        labels = x.pop('labels')
        return x, labels
      dataset = dataset.map(_split)
      return dataset

    classifier.train(train_input_fn, max_steps=1)

    classifier = extenders.forward_features(
        classifier, keys=['x'], sparse_default_values={'x': 0})

    def serving_input_fn():
      features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x',
                                          shape=[None])
      features = {'x': layers.dense_to_sparse(features_ph)}
      return estimator_lib.export.ServingInputReceiver(features,
                                                       {'x': features_ph})
    export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn)
    prediction_fn = from_saved_model(export_dir, signature_def_key='predict')

    features = (0, 2)
    prediction = prediction_fn({'x': features})

    self.assertIn('x', prediction)
    self.assertEqual(features, tuple(prediction['x']))
    gfile.DeleteRecursively(tmpdir)
def _sequence_indicator_column(categorical_column):
  """Returns a feature column that represents sequences of multi-hot tensors.

  Use this to convert sequence categorical data into dense representation for
  input to sequence NN, such as RNN.

  Example:

  ```python
  colors = sequence_categorical_column_with_vocabulary_list(
      key='colors', vocabulary_list=('R', 'G', 'B', 'Y'))
  colors_indicator = _sequence_indicator_column(colors)
  columns = [colors]

  features = tf.parse_example(..., features=make_parse_example_spec(columns))
  input_layer, sequence_length = sequence_input_layer(features, columns)

  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
  outputs, state = tf.nn.dynamic_rnn(
      rnn_cell, inputs=input_layer, sequence_length=sequence_length)
  ```

  Args:
    categorical_column: A `_SequenceCategoricalColumn` created with a
      `sequence_cateogrical_column_with_*` function.

  Returns:
    A `_SequenceCategoricalToDenseColumn`.

  Raises:
    ValueError: If `categorical_column` is not the right type.
  """
  if not isinstance(categorical_column, _SequenceCategoricalColumn):
    raise ValueError(
        'categorical_column must be of type _SequenceCategoricalColumn. '
        'Given (type {}): {}'.format(
            type(categorical_column), categorical_column))
  return _SequenceCategoricalToDenseColumn(
      fc.indicator_column(categorical_column))
Esempio n. 13
0
    def testMultiExamplesMultiFeatures(self):
        """Tests examples with multiple sequential feature columns.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.94, -0.96], [0.72, -0.38]]
    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.92, -0.88], [<ignored-padding>]]
    logits = [[-1*0.92 - 1*0.88 + 0.3],
              [-1*0.72 - 1*0.38 + 0.3]]
           = [[-1.5056], [-0.7962]]
    """
        base_global_step = 100
        create_checkpoint(
            # FeatureColumns are sorted alphabetically, so on_sale weights are
            # inserted before price.
            rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3,
                                                                      -.4]],
            rnn_biases=[.2, .5],
            logits_weights=[[-1.], [1.]],
            logits_biases=[0.3],
            global_step=base_global_step,
            model_dir=self._model_dir)

        def features_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2.],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
                'on_sale':
                sparse_tensor.SparseTensor(values=[0, 1, 0],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
            }

        price_column = seq_fc.sequence_numeric_column('price', shape=(1, ))
        on_sale_column = fc.indicator_column(
            seq_fc.sequence_categorical_column_with_identity('on_sale',
                                                             num_buckets=2))
        sequence_feature_columns = [price_column, on_sale_column]
        context_feature_columns = []

        for mode in [
                model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                model_fn.ModeKeys.PREDICT
        ]:
            self._test_logits(
                mode,
                rnn_units=[2],
                logits_dimension=1,
                features_fn=features_fn,
                sequence_feature_columns=sequence_feature_columns,
                context_feature_columns=context_feature_columns,
                expected_logits=[[-1.5056], [-0.7962]])
Esempio n. 14
0
from tensorflow.contrib.learn import LinearRegressor, pandas_input_fn, DNNRegressor, Experiment
from tensorflow.python.feature_column.feature_column import categorical_column_with_hash_bucket, numeric_column, \
    categorical_column_with_vocabulary_list, embedding_column, indicator_column

make = categorical_column_with_hash_bucket('make', 100)
horsepower = numeric_column('horsepower', shape=[])
cylinders = categorical_column_with_vocabulary_list(
    'num-of-cylinders', ['two', 'three', 'four', 'six', 'eight'])

###############
regressor = DNNRegressor(feature_columns=[
    embedding_column(make, 10), horsepower,
    indicator_column(cylinders, 3)
],
                         hidden_units=[50, 30, 10])
################
regressor = LinearRegressor(feature_columns=[make, horsepower, cylinders])

# any python generator
train_input_fn = pandas_input_fn(x=input_data,
                                 y=input_label,
                                 batch_size=64,
                                 shuffle=True,
                                 num_epochs=None)

regressor.train(train_input_fn, steps=10000)


def expirement_fn(run_config, hparams):
    regressor = DNNRegressor(...,
                             config=run_config,
  def test_complete_flow(self):
    n_classes = 3
    input_dimension = 2
    batch_size = 12

    data = np.linspace(
        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
    x_data = data.reshape(batch_size, input_dimension)
    categorical_data = np.random.random_integers(
        0, len(x_data), size=len(x_data))
    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data,
           'categories': categorical_data},
        y=y_data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data,
           'categories': categorical_data},
        y=y_data,
        batch_size=batch_size,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data,
           'categories': categorical_data},
        batch_size=batch_size,
        shuffle=False)

    feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,)),
        feature_column.indicator_column(
            feature_column.categorical_column_with_vocabulary_list(
                'categories',
                vocabulary_list=np.linspace(
                    0., len(x_data), len(x_data), dtype=np.int64)))
    ]

    estimator = dnn.DNNClassifier(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        n_classes=n_classes,
        model_dir=self._model_dir)

    def optimizer_fn():
      return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)

    estimator = estimator_lib.Estimator(
        model_fn=replicate_model_fn.replicate_model_fn(
            estimator.model_fn,
            optimizer_fn,
            devices=['/gpu:0', '/gpu:1', '/gpu:2']),
        model_dir=estimator.model_dir,
        config=estimator.config,
        params=estimator.params)

    num_steps = 10
    estimator.train(train_input_fn, steps=num_steps)

    scores = estimator.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    predicted_proba = np.array([
        x[prediction_keys.PredictionKeys.PROBABILITIES]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Esempio n. 16
0
  def testMultiExamplesMultiFeatures(self):
    """Tests examples with multiple sequential feature columns.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.94, -0.96], [0.72, -0.38]]
    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.92, -0.88], [<ignored-padding>]]
    logits = [[-1*0.92 - 1*0.88 + 0.3],
              [-1*0.72 - 1*0.38 + 0.3]]
           = [[-1.5056], [-0.7962]]
    """
    base_global_step = 100
    create_checkpoint(
        # FeatureColumns are sorted alphabetically, so on_sale weights are
        # inserted before price.
        rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
          'on_sale':
              sparse_tensor.SparseTensor(
                  values=[0, 1, 0],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
      }

    price_column = seq_fc.sequence_numeric_column('price', shape=(1,))
    on_sale_column = fc.indicator_column(
        seq_fc.sequence_categorical_column_with_identity(
            'on_sale', num_buckets=2))
    sequence_feature_columns = [price_column, on_sale_column]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-1.5056], [-0.7962]])
Esempio n. 17
0
    def _build_feature_columns(self, ):
        multi_hot_feature_columns = {}
        multi_hot_feature_columns_deep = {}
        multi_category_feature_columns = {}
        continuous_feature_columns = {}
        crossed_feature_columns = []
        bucketized_feature_columns = []
        embedding_feature_columns = []

        if self._data_conf.multi_hot_columns is not None:
            for column in self._data_conf.multi_hot_columns:
                multi_hot_feature_columns[
                    column] = categorical_column_with_vocabulary_list(
                        column,
                        self._data_conf.multi_hot_columns[column],
                        dtype=tf.string)
                multi_hot_feature_columns_deep[column] = indicator_column(
                    multi_hot_feature_columns[column])

        if self._data_conf.multi_category_columns is not None:
            multi_category_feature_columns = {
                column:
                categorical_column_with_hash_bucket(column,
                                                    hash_bucket_size=1000)
                for column in self._data_conf.multi_category_columns
            }

        if self._data_conf.continuous_columns is not None:
            continuous_feature_columns = {
                column: numeric_column(column)
                for column in self._data_conf.continuous_columns
            }

        if self._data_conf.crossed_columns is not None:
            crossed_feature_columns = [
                crossed_column(_, hash_bucket_size=100000)
                for _ in self._data_conf.crossed_columns
            ]

        if self._data_conf.bucketized_columns is not None:
            [
                bucketized_feature_columns.append(
                    bucketized_column(continuous_feature_columns[column],
                                      boundaries=boundary)) for column,
                boundary in self._data_conf.bucketized_columns.items
            ]

        if len(multi_category_feature_columns) > 0:
            embedding_feature_columns = [
                embedding_column(
                    _, dimension=self._model_conf.embedding_dimension)
                for _ in multi_category_feature_columns.values()
            ]

        self._feature_mapping = {
            0: list(multi_hot_feature_columns.values()),
            1: list(multi_category_feature_columns.values()),
            2: list(continuous_feature_columns.values()),
            3: crossed_feature_columns,
            4: bucketized_feature_columns,
            5: embedding_feature_columns,
            6: list(multi_hot_feature_columns_deep.values())
        }

        self._build_feature_columns_for_model()