コード例 #1
0
  def test_sequence_length_with_empty_rows(self):
    """Tests _sequence_length when some examples do not have ids."""
    vocabulary_size = 3
    sparse_input_a = sparse_tensor.SparseTensorValue(
        # example 0, ids []
        # example 1, ids [2]
        # example 2, ids [0, 1]
        # example 3, ids []
        # example 4, ids [1]
        # example 5, ids []
        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(6, 2))
    expected_sequence_length_a = [0, 1, 2, 0, 1, 0]
    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)

    sparse_input_b = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids []
        # example 2, ids []
        # example 3, ids []
        # example 4, ids [1]
        # example 5, ids [0, 1]
        indices=((0, 0), (4, 0), (5, 0), (5, 1)),
        values=(2, 1, 0, 1),
        dense_shape=(6, 2))
    expected_sequence_length_b = [1, 0, 0, 0, 1, 2]
    categorical_column_b = sfc.sequence_categorical_column_with_identity(
        key='bbb', num_buckets=vocabulary_size)

    shared_embedding_columns = fc.shared_embedding_columns(
        [categorical_column_a, categorical_column_b], dimension=2)

    sequence_length_a = shared_embedding_columns[0]._get_sequence_dense_tensor(
        _LazyBuilder({
            'aaa': sparse_input_a
        }))[1]
    sequence_length_b = shared_embedding_columns[1]._get_sequence_dense_tensor(
        _LazyBuilder({
            'bbb': sparse_input_b
        }))[1]

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length_a, sequence_length_a.eval(session=sess))
      self.assertAllEqual(
          expected_sequence_length_b, sequence_length_b.eval(session=sess))
コード例 #2
0
  def test_get_sequence_dense_tensor(self):
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        # example 2, ids []
        # example 3, ids [1]
        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(4, 2))

    expected_lookups = [
        # example 0, ids [2]
        [[0., 0., 1.], [0., 0., 0.]],
        # example 1, ids [0, 1]
        [[1., 0., 0.], [0., 1., 0.]],
        # example 2, ids []
        [[0., 0., 0.], [0., 0., 0.]],
        # example 3, ids [1]
        [[0., 1., 0.], [0., 0., 0.]],
    ]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column = fc.indicator_column(categorical_column)

    indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
コード例 #3
0
    def test_sequence_length_with_empty_rows(self):
        """Tests _sequence_length when some examples do not have ids."""
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids []
            # example 1, ids [2]
            # example 2, ids [0, 1]
            # example 3, ids []
            # example 4, ids [1]
            # example 5, ids []
            indices=((1, 0), (2, 0), (2, 1), (4, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(6, 2))
        expected_sequence_length = [0, 1, 2, 0, 1, 0]

        categorical_column = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        indicator_column = sfc._sequence_indicator_column(categorical_column)

        _, sequence_length = indicator_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
コード例 #4
0
  def test_sequence_length_with_empty_rows(self):
    """Tests _sequence_length when some examples do not have ids."""
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids []
        # example 1, ids [2]
        # example 2, ids [0, 1]
        # example 3, ids []
        # example 4, ids [1]
        # example 5, ids []
        indices=((1, 0), (2, 0), (2, 1), (4, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(6, 2))
    expected_sequence_length = [0, 1, 2, 0, 1, 0]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column = fc.indicator_column(categorical_column)

    _, sequence_length = indicator_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
コード例 #5
0
    def test_get_sequence_dense_tensor(self):
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 1), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 2))

        expected_lookups = [
            # example 0, ids [2]
            [[0., 0., 1.], [0., 0., 0.]],
            # example 1, ids [0, 1]
            [[1., 0., 0.], [0., 1., 0.]],
            # example 2, ids []
            [[0., 0., 0.], [0., 0., 0.]],
            # example 3, ids [1]
            [[0., 1., 0.], [0., 0., 0.]],
        ]

        categorical_column = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        indicator_column = sfc._sequence_indicator_column(categorical_column)

        indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_lookups,
                                indicator_tensor.eval(session=sess))
コード例 #6
0
    def test_indicator_column(self):
        vocabulary_size_a = 3
        sparse_input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        vocabulary_size_b = 2
        sparse_input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [1]
            # example 1, ids [1, 0]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(1, 1, 0),
            dense_shape=(2, 2))

        expected_input_layer = [
            # example 0, ids_a [2], ids_b [1]
            [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
            # example 1, ids_a [0, 1], ids_b [1, 0]
            [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]],
        ]
        expected_sequence_length = [1, 2]

        categorical_column_a = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size_a)
        indicator_column_a = sfc._sequence_indicator_column(
            categorical_column_a)
        categorical_column_b = sfc.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size_b)
        indicator_column_b = sfc._sequence_indicator_column(
            categorical_column_b)
        input_layer, sequence_length = sfc.sequence_input_layer(
            features={
                'aaa': sparse_input_a,
                'bbb': sparse_input_b,
            },
            # Test that columns are reordered alphabetically.
            feature_columns=[indicator_column_b, indicator_column_a])

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_input_layer,
                                input_layer.eval(session=sess))
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
コード例 #7
0
  def test_indicator_column(self):
    vocabulary_size_a = 3
    sparse_input_a = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))
    vocabulary_size_b = 2
    sparse_input_b = sparse_tensor.SparseTensorValue(
        # example 0, ids [1]
        # example 1, ids [1, 0]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(1, 1, 0),
        dense_shape=(2, 2))

    expected_input_layer = [
        # example 0, ids_a [2], ids_b [1]
        [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
        # example 1, ids_a [0, 1], ids_b [1, 0]
        [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]],
    ]
    expected_sequence_length = [1, 2]

    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size_a)
    indicator_column_a = fc.indicator_column(categorical_column_a)
    categorical_column_b = sfc.sequence_categorical_column_with_identity(
        key='bbb', num_buckets=vocabulary_size_b)
    indicator_column_b = fc.indicator_column(categorical_column_b)
    input_layer, sequence_length = sfc.sequence_input_layer(
        features={
            'aaa': sparse_input_a,
            'bbb': sparse_input_b,
        },
        # Test that columns are reordered alphabetically.
        feature_columns=[indicator_column_b, indicator_column_a])

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
コード例 #8
0
  def test_sequence_length(self):
    vocabulary_size = 3

    sparse_input_a = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))
    expected_sequence_length_a = [1, 2]
    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)

    sparse_input_b = sparse_tensor.SparseTensorValue(
        # example 0, ids [0, 2]
        # example 1, ids [1]
        indices=((0, 0), (0, 1), (1, 0)),
        values=(0, 2, 1),
        dense_shape=(2, 2))
    expected_sequence_length_b = [2, 1]
    categorical_column_b = sfc.sequence_categorical_column_with_identity(
        key='bbb', num_buckets=vocabulary_size)
    shared_embedding_columns = fc.shared_embedding_columns(
        [categorical_column_a, categorical_column_b], dimension=2)

    sequence_length_a = shared_embedding_columns[0]._get_sequence_dense_tensor(
        _LazyBuilder({
            'aaa': sparse_input_a
        }))[1]
    sequence_length_b = shared_embedding_columns[1]._get_sequence_dense_tensor(
        _LazyBuilder({
            'bbb': sparse_input_b
        }))[1]

    with monitored_session.MonitoredSession() as sess:
      sequence_length_a = sess.run(sequence_length_a)
      self.assertAllEqual(expected_sequence_length_a, sequence_length_a)
      self.assertEqual(np.int64, sequence_length_a.dtype)
      sequence_length_b = sess.run(sequence_length_b)
      self.assertAllEqual(expected_sequence_length_b, sequence_length_b)
      self.assertEqual(np.int64, sequence_length_b.dtype)
コード例 #9
0
    def test_get_sequence_dense_tensor(self):
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 1), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 2))

        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        expected_lookups = [
            # example 0, ids [2]
            [[7., 11.], [0., 0.]],
            # example 1, ids [0, 1]
            [[1., 2.], [3., 5.]],
            # example 2, ids []
            [[0., 0.], [0., 0.]],
            # example 3, ids [1]
            [[3., 5.], [0., 0.]],
        ]

        categorical_column = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        embedding_column = sfc._sequence_embedding_column(
            categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer)

        embedding_lookup, _ = embedding_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual(('embedding_weights:0', ),
                              tuple([v.name for v in global_vars]))
        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(embedding_values,
                                global_vars[0].eval(session=sess))
            self.assertAllEqual(expected_lookups,
                                embedding_lookup.eval(session=sess))
コード例 #10
0
  def test_sequence_length_with_zeros(self):
    column = sfc.sequence_categorical_column_with_identity(
        'aaa', num_buckets=3)
    inputs = sparse_tensor.SparseTensorValue(
        indices=((1, 0), (3, 0), (3, 1)),
        values=(1, 2, 0),
        dense_shape=(5, 2))
    expected_sequence_length = [0, 1, 0, 2, 0]

    sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
コード例 #11
0
  def test_sequence_length_with_zeros(self):
    column = sfc.sequence_categorical_column_with_identity(
        'aaa', num_buckets=3)
    inputs = sparse_tensor.SparseTensorValue(
        indices=((1, 0), (3, 0), (3, 1)),
        values=(1, 2, 0),
        dense_shape=(5, 2))
    expected_sequence_length = [0, 1, 0, 2, 0]

    sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
コード例 #12
0
  def test_get_sequence_dense_tensor(self):
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        # example 2, ids []
        # example 3, ids [1]
        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
        values=(2, 0, 1, 1),
        dense_shape=(4, 2))

    embedding_dimension = 2
    embedding_values = (
        (1., 2.),  # id 0
        (3., 5.),  # id 1
        (7., 11.)  # id 2
    )
    def _initializer(shape, dtype, partition_info):
      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
      self.assertEqual(dtypes.float32, dtype)
      self.assertIsNone(partition_info)
      return embedding_values

    expected_lookups = [
        # example 0, ids [2]
        [[7., 11.], [0., 0.]],
        # example 1, ids [0, 1]
        [[1., 2.], [3., 5.]],
        # example 2, ids []
        [[0., 0.], [0., 0.]],
        # example 3, ids [1]
        [[3., 5.], [0., 0.]],
    ]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    embedding_column = fc.embedding_column(
        categorical_column, dimension=embedding_dimension,
        initializer=_initializer)

    embedding_lookup, _ = embedding_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
    self.assertItemsEqual(
        ('embedding_weights:0',), tuple([v.name for v in global_vars]))
    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
      self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
コード例 #13
0
  def test_sequence_length(self):
    column = sfc.sequence_categorical_column_with_identity(
        'aaa', num_buckets=3)
    inputs = sparse_tensor.SparseTensorValue(
        indices=((0, 0), (1, 0), (1, 1)),
        values=(1, 2, 0),
        dense_shape=(2, 2))
    expected_sequence_length = [1, 2]

    sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs}))

    with monitored_session.MonitoredSession() as sess:
      sequence_length = sess.run(sequence_length)
      self.assertAllEqual(expected_sequence_length, sequence_length)
      self.assertEqual(np.int64, sequence_length.dtype)
コード例 #14
0
    def test_sequence_length(self):
        column = sfc.sequence_categorical_column_with_identity('aaa',
                                                               num_buckets=3)
        inputs = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0), (1,
                                                                           1)),
                                                 values=(1, 2, 0),
                                                 dense_shape=(2, 2))
        expected_sequence_length = [1, 2]

        sequence_length = column._sequence_length(_LazyBuilder({'aaa':
                                                                inputs}))

        with monitored_session.MonitoredSession() as sess:
            sequence_length = sess.run(sequence_length)
            self.assertAllEqual(expected_sequence_length, sequence_length)
            self.assertEqual(np.int64, sequence_length.dtype)
コード例 #15
0
  def _build_feature_columns(self):
    col = fc.categorical_column_with_identity(
        'int_ctx', num_buckets=100)
    ctx_cols = [
        fc.embedding_column(col, dimension=10),
        fc.numeric_column('float_ctx')]

    identity_col = sfc.sequence_categorical_column_with_identity(
        'int_list', num_buckets=10)
    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
        'bytes_list', hash_bucket_size=100)
    seq_cols = [
        fc.embedding_column(identity_col, dimension=10),
        fc.embedding_column(bucket_col, dimension=20)]

    return ctx_cols, seq_cols
コード例 #16
0
    def test_get_sparse_tensors_inputs3d(self):
        """Tests _get_sparse_tensors when the input is already 3D Tensor."""
        column = sfc.sequence_categorical_column_with_identity('aaa',
                                                               num_buckets=3)
        inputs = sparse_tensor.SparseTensorValue(indices=((0, 0, 0), (1, 0, 0),
                                                          (1, 1, 0)),
                                                 values=(1, 2, 0),
                                                 dense_shape=(2, 2, 1))

        with self.assertRaisesRegexp(
                errors.InvalidArgumentError,
                r'Column aaa expected ID tensor of rank 2\.\s*'
                r'id_tensor shape:\s*\[2 2 1\]'):
            id_weight_pair = column._get_sparse_tensors(
                _LazyBuilder({'aaa': inputs}))
            with monitored_session.MonitoredSession() as sess:
                id_weight_pair.id_tensor.eval(session=sess)
コード例 #17
0
  def test_get_sparse_tensors_inputs3d(self):
    """Tests _get_sparse_tensors when the input is already 3D Tensor."""
    column = sfc.sequence_categorical_column_with_identity(
        'aaa', num_buckets=3)
    inputs = sparse_tensor.SparseTensorValue(
        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
        values=(1, 2, 0),
        dense_shape=(2, 2, 1))

    with self.assertRaisesRegexp(
        errors.InvalidArgumentError,
        r'Column aaa expected ID tensor of rank 2\.\s*'
        r'id_tensor shape:\s*\[2 2 1\]'):
      id_weight_pair = column._get_sparse_tensors(
          _LazyBuilder({'aaa': inputs}))
      with monitored_session.MonitoredSession() as sess:
        id_weight_pair.id_tensor.eval(session=sess)
コード例 #18
0
    def test_get_sparse_tensors(self):
        column = sfc.sequence_categorical_column_with_identity('aaa',
                                                               num_buckets=3)
        inputs = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0), (1,
                                                                           1)),
                                                 values=(1, 2, 0),
                                                 dense_shape=(2, 2))
        expected_sparse_ids = sparse_tensor.SparseTensorValue(
            indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
            values=np.array((1, 2, 0), dtype=np.int64),
            dense_shape=(2, 2, 1))

        id_weight_pair = column._get_sparse_tensors(
            _LazyBuilder({'aaa': inputs}))

        self.assertIsNone(id_weight_pair.weight_tensor)
        with monitored_session.MonitoredSession() as sess:
            _assert_sparse_tensor_value(
                self, expected_sparse_ids,
                id_weight_pair.id_tensor.eval(session=sess))
コード例 #19
0
    def test_indicator_column(self):
        """Tests that error is raised for sequence indicator column."""
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))

        categorical_column_a = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        indicator_column_a = fc.indicator_column(categorical_column_a)

        with self.assertRaisesRegexp(
                ValueError,
                r'In indicator_column: aaa_indicator\. categorical_column must not be '
                r'of type _SequenceCategoricalColumn\.'):
            _ = fc.input_layer(features={'aaa': sparse_input},
                               feature_columns=[indicator_column_a])
コード例 #20
0
  def test_get_sparse_tensors(self):
    column = sfc.sequence_categorical_column_with_identity(
        'aaa', num_buckets=3)
    inputs = sparse_tensor.SparseTensorValue(
        indices=((0, 0), (1, 0), (1, 1)),
        values=(1, 2, 0),
        dense_shape=(2, 2))
    expected_sparse_ids = sparse_tensor.SparseTensorValue(
        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
        values=np.array((1, 2, 0), dtype=np.int64),
        dense_shape=(2, 2, 1))

    id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))

    self.assertIsNone(id_weight_pair.weight_tensor)
    with monitored_session.MonitoredSession() as sess:
      _assert_sparse_tensor_value(
          self,
          expected_sparse_ids,
          id_weight_pair.id_tensor.eval(session=sess))
コード例 #21
0
  def test_indicator_column(self):
    """Tests that error is raised for sequence indicator column."""
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))

    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column_a = fc.indicator_column(categorical_column_a)

    with self.assertRaisesRegexp(
        ValueError,
        r'In indicator_column: aaa_indicator\. categorical_column must not be '
        r'of type _SequenceCategoricalColumn\.'):
      _ = fc.input_layer(
          features={'aaa': sparse_input},
          feature_columns=[indicator_column_a])
コード例 #22
0
  def test_sequence_length(self):
    vocabulary_size = 3
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))
    expected_sequence_length = [1, 2]

    categorical_column = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    indicator_column = fc.indicator_column(categorical_column)

    _, sequence_length = indicator_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      sequence_length = sess.run(sequence_length)
      self.assertAllEqual(expected_sequence_length, sequence_length)
      self.assertEqual(np.int64, sequence_length.dtype)
コード例 #23
0
    def test_sequence_length(self):
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        expected_sequence_length = [1, 2]

        categorical_column = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        indicator_column = sfc._sequence_indicator_column(categorical_column)

        _, sequence_length = indicator_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            sequence_length = sess.run(sequence_length)
            self.assertAllEqual(expected_sequence_length, sequence_length)
            self.assertEqual(np.int64, sequence_length.dtype)
コード例 #24
0
    def testMultiExamplesMultiFeatures(self):
        """Tests examples with multiple sequential feature columns.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.94, -0.96], [0.72, -0.38]]
    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.92, -0.88], [<ignored-padding>]]
    logits = [[-1*0.92 - 1*0.88 + 0.3],
              [-1*0.72 - 1*0.38 + 0.3]]
           = [[-1.5056], [-0.7962]]
    """
        base_global_step = 100
        create_checkpoint(
            # FeatureColumns are sorted alphabetically, so on_sale weights are
            # inserted before price.
            rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3,
                                                                      -.4]],
            rnn_biases=[.2, .5],
            logits_weights=[[-1.], [1.]],
            logits_biases=[0.3],
            global_step=base_global_step,
            model_dir=self._model_dir)

        def features_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2.],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
                'on_sale':
                sparse_tensor.SparseTensor(values=[0, 1, 0],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
            }

        price_column = seq_fc.sequence_numeric_column('price', shape=(1, ))
        on_sale_column = fc.indicator_column(
            seq_fc.sequence_categorical_column_with_identity('on_sale',
                                                             num_buckets=2))
        sequence_feature_columns = [price_column, on_sale_column]
        context_feature_columns = []

        for mode in [
                model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                model_fn.ModeKeys.PREDICT
        ]:
            self._test_logits(
                mode,
                rnn_units=[2],
                logits_dimension=1,
                features_fn=features_fn,
                sequence_feature_columns=sequence_feature_columns,
                context_feature_columns=context_feature_columns,
                expected_logits=[[-1.5056], [-0.7962]])
コード例 #25
0
ファイル: rnn_test.py プロジェクト: ThunderQi/tensorflow
  def testMultiExamplesMultiFeatures(self):
    """Tests examples with multiple sequential feature columns.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.94, -0.96], [0.72, -0.38]]
    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.92, -0.88], [<ignored-padding>]]
    logits = [[-1*0.92 - 1*0.88 + 0.3],
              [-1*0.72 - 1*0.38 + 0.3]]
           = [[-1.5056], [-0.7962]]
    """
    base_global_step = 100
    create_checkpoint(
        # FeatureColumns are sorted alphabetically, so on_sale weights are
        # inserted before price.
        rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
          'on_sale':
              sparse_tensor.SparseTensor(
                  values=[0, 1, 0],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
      }

    price_column = seq_fc.sequence_numeric_column('price', shape=(1,))
    on_sale_column = fc.indicator_column(
        seq_fc.sequence_categorical_column_with_identity(
            'on_sale', num_buckets=2))
    sequence_feature_columns = [price_column, on_sale_column]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-1.5056], [-0.7962]])
コード例 #26
0
  def test_embedding_column(self):
    vocabulary_size = 3
    sparse_input_a = sparse_tensor.SparseTensorValue(
        # example 0, ids [2]
        # example 1, ids [0, 1]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(2, 0, 1),
        dense_shape=(2, 2))
    sparse_input_b = sparse_tensor.SparseTensorValue(
        # example 0, ids [1]
        # example 1, ids [2, 0]
        indices=((0, 0), (1, 0), (1, 1)),
        values=(1, 2, 0),
        dense_shape=(2, 2))

    embedding_dimension_a = 2
    embedding_values_a = (
        (1., 2.),  # id 0
        (3., 4.),  # id 1
        (5., 6.)  # id 2
    )
    embedding_dimension_b = 3
    embedding_values_b = (
        (11., 12., 13.),  # id 0
        (14., 15., 16.),  # id 1
        (17., 18., 19.)  # id 2
    )
    def _get_initializer(embedding_dimension, embedding_values):
      def _initializer(shape, dtype, partition_info):
        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
        self.assertEqual(dtypes.float32, dtype)
        self.assertIsNone(partition_info)
        return embedding_values
      return _initializer

    expected_input_layer = [
        # example 0, ids_a [2], ids_b [1]
        [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
        # example 1, ids_a [0, 1], ids_b [2, 0]
        [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],
    ]
    expected_sequence_length = [1, 2]

    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    embedding_column_a = fc.embedding_column(
        categorical_column_a, dimension=embedding_dimension_a,
        initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
    categorical_column_b = sfc.sequence_categorical_column_with_identity(
        key='bbb', num_buckets=vocabulary_size)
    embedding_column_b = fc.embedding_column(
        categorical_column_b, dimension=embedding_dimension_b,
        initializer=_get_initializer(embedding_dimension_b, embedding_values_b))

    input_layer, sequence_length = sfc.sequence_input_layer(
        features={
            'aaa': sparse_input_a,
            'bbb': sparse_input_b,
        },
        # Test that columns are reordered alphabetically.
        feature_columns=[embedding_column_b, embedding_column_a])

    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
    self.assertItemsEqual(
        ('sequence_input_layer/aaa_embedding/embedding_weights:0',
         'sequence_input_layer/bbb_embedding/embedding_weights:0'),
        tuple([v.name for v in global_vars]))
    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess))
      self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess))
      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
コード例 #27
0
    def test_embedding_column(self):
        vocabulary_size = 3
        sparse_input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        sparse_input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [1]
            # example 1, ids [2, 0]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(1, 2, 0),
            dense_shape=(2, 2))

        embedding_dimension_a = 2
        embedding_values_a = (
            (1., 2.),  # id 0
            (3., 4.),  # id 1
            (5., 6.)  # id 2
        )
        embedding_dimension_b = 3
        embedding_values_b = (
            (11., 12., 13.),  # id 0
            (14., 15., 16.),  # id 1
            (17., 18., 19.)  # id 2
        )

        def _get_initializer(embedding_dimension, embedding_values):
            def _initializer(shape, dtype, partition_info):
                self.assertAllEqual((vocabulary_size, embedding_dimension),
                                    shape)
                self.assertEqual(dtypes.float32, dtype)
                self.assertIsNone(partition_info)
                return embedding_values

            return _initializer

        expected_input_layer = [
            # example 0, ids_a [2], ids_b [1]
            [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
            # example 1, ids_a [0, 1], ids_b [2, 0]
            [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],
        ]
        expected_sequence_length = [1, 2]

        categorical_column_a = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        embedding_column_a = sfc._sequence_embedding_column(
            categorical_column_a,
            dimension=embedding_dimension_a,
            initializer=_get_initializer(embedding_dimension_a,
                                         embedding_values_a))
        categorical_column_b = sfc.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size)
        embedding_column_b = sfc._sequence_embedding_column(
            categorical_column_b,
            dimension=embedding_dimension_b,
            initializer=_get_initializer(embedding_dimension_b,
                                         embedding_values_b))

        input_layer, sequence_length = sfc.sequence_input_layer(
            features={
                'aaa': sparse_input_a,
                'bbb': sparse_input_b,
            },
            # Test that columns are reordered alphabetically.
            feature_columns=[embedding_column_b, embedding_column_a])

        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual(
            ('sequence_input_layer/aaa_embedding/embedding_weights:0',
             'sequence_input_layer/bbb_embedding/embedding_weights:0'),
            tuple([v.name for v in global_vars]))
        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(embedding_values_a,
                                global_vars[0].eval(session=sess))
            self.assertAllEqual(embedding_values_b,
                                global_vars[1].eval(session=sess))
            self.assertAllEqual(expected_input_layer,
                                input_layer.eval(session=sess))
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))