Esempio n. 1
0
def myself_input_layer(features,
                       feature_columns,
                       weight_collections=None,
                       trainable=True,
                       cols_to_vars=None):
    feature_columns = _normalize_feature_columns(feature_columns)
    for column in feature_columns:
        if not isinstance(column, _DenseColumn):
            raise ValueError(
                'Items of feature_columns must be a _DenseColumn. '
                'You can wrap a categorical column with an '
                'embedding_column or indicator_column. Given: {}'.format(
                    column))
    weight_collections = list(weight_collections or [])
    if tf.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
        weight_collections.append(tf.GraphKeys.GLOBAL_VARIABLES)
    if tf.GraphKeys.MODEL_VARIABLES not in weight_collections:
        weight_collections.append(tf.GraphKeys.MODEL_VARIABLES)

    # a non-None `scope` can allow for variable reuse, when, e.g., this function
    # is wrapped by a `make_template`.
    with tf.variable_scope(None,
                           default_name='myself_input_layer',
                           values=features.values()):
        builder = _LazyBuilder(features)
        output_tensors = {}
        for column in feature_columns:
            with tf.variable_scope(None, default_name=column._var_scope_name):
                tensor = column._get_dense_tensor(
                    builder,
                    weight_collections=weight_collections,
                    trainable=trainable)
                num_elements = column._variable_shape.num_elements()  # pylint: disable=protected-access
                batch_size = tf.shape(tensor)[0]
                output_tensors[column.name] = tf.reshape(tensor,
                                                         shape=(batch_size,
                                                                num_elements))
            if cols_to_vars is not None:
                # Retrieve any variables created (some _DenseColumn's don't create
                # variables, in which case an empty list is returned).
                cols_to_vars[column] = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES,
                    scope=tf.get_variable_scope().name)
    return output_tensors
def sequence_input_layer(
    features,
    feature_columns,
    weight_collections=None,
    trainable=True):
  """"Builds input layer for sequence input.

  All `feature_columns` must be sequence dense columns with the same
  `sequence_length`. The output of this method can be fed into sequence
  networks, such as RNN.

  The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
  `T` is the maximum sequence length for this batch, which could differ from
  batch to batch.

  If multiple `feature_columns` are given with `Di` `num_elements` each, their
  outputs are concatenated. So, the final `Tensor` has shape
  `[batch_size, T, D0 + D1 + ... + Dn]`.

  Example:

  ```python
  rating = sequence_numeric_column('rating')
  watches = sequence_categorical_column_with_identity(
      'watches', num_buckets=1000)
  watches_embedding = embedding_column(watches, dimension=10)
  columns = [rating, watches]

  features = tf.parse_example(..., features=make_parse_example_spec(columns))
  input_layer, sequence_length = sequence_input_layer(features, columns)

  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
  outputs, state = tf.nn.dynamic_rnn(
      rnn_cell, inputs=input_layer, sequence_length=sequence_length)
  ```

  Args:
    features: A dict mapping keys to tensors.
    feature_columns: An iterable of dense sequence columns. Valid columns are
      - `embedding_column` that wraps a `sequence_categorical_column_with_*`
      - `sequence_numeric_column`.
    weight_collections: A list of collection names to which the Variable will be
      added. Note that variables will also be added to collections
      `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
    trainable: If `True` also add the variable to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES`.

  Returns:
    An `(input_layer, sequence_length)` tuple where:
    - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
        `T` is the maximum sequence length for this batch, which could differ
        from batch to batch. `D` is the sum of `num_elements` for all
        `feature_columns`.
    - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
        length for each example.

  Raises:
    ValueError: If any of the `feature_columns` is the wrong type.
  """
  feature_columns = fc._normalize_feature_columns(feature_columns)
  for c in feature_columns:
    if not isinstance(c, fc._SequenceDenseColumn):
      raise ValueError(
          'All feature_columns must be of type _SequenceDenseColumn. '
          'You can wrap a sequence_categorical_column with an embedding_column '
          'or indicator_column. '
          'Given (type {}): {}'.format(type(c), c))

  with variable_scope.variable_scope(
      None, default_name='sequence_input_layer', values=features.values()):
    builder = fc._LazyBuilder(features)
    output_tensors = []
    sequence_lengths = []
    ordered_columns = []

    for column in sorted(feature_columns, key=lambda x: x.name):
      ordered_columns.append(column)
      with variable_scope.variable_scope(
          None, default_name=column._var_scope_name):
        dense_tensor, sequence_length = column._get_sequence_dense_tensor(
            builder,
            weight_collections=weight_collections,
            trainable=trainable)
        # Flattens the final dimension to produce a 3D Tensor.
        num_elements = column._variable_shape.num_elements()
        shape = array_ops.shape(dense_tensor)
        target_shape = [shape[0], shape[1], num_elements]
        output_tensors.append(
            array_ops.reshape(dense_tensor, shape=target_shape))
        sequence_lengths.append(sequence_length)

    fc._verify_static_batch_size_equality(output_tensors, ordered_columns)
    fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns)
    sequence_length = _assert_all_equal_and_return(sequence_lengths)

    return array_ops.concat(output_tensors, -1), sequence_length
Esempio n. 3
0
def sequence_input_layer(features,
                         feature_columns,
                         weight_collections=None,
                         trainable=True):
    """"Builds input layer for sequence input.

  All `feature_columns` must be sequence dense columns with the same
  `sequence_length`. The output of this method can be fed into sequence
  networks, such as RNN.

  The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
  `T` is the maximum sequence length for this batch, which could differ from
  batch to batch.

  If multiple `feature_columns` are given with `Di` `num_elements` each, their
  outputs are concatenated. So, the final `Tensor` has shape
  `[batch_size, T, D0 + D1 + ... + Dn]`.

  Example:

  ```python
  rating = sequence_numeric_column('rating')
  watches = sequence_categorical_column_with_identity(
      'watches', num_buckets=1000)
  watches_embedding = embedding_column(watches, dimension=10)
  columns = [rating, watches]

  features = tf.parse_example(..., features=make_parse_example_spec(columns))
  input_layer, sequence_length = sequence_input_layer(features, columns)

  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
  outputs, state = tf.nn.dynamic_rnn(
      rnn_cell, inputs=input_layer, sequence_length=sequence_length)
  ```

  Args:
    features: A dict mapping keys to tensors.
    feature_columns: An iterable of dense sequence columns. Valid columns are
      - `embedding_column` that wraps a `sequence_categorical_column_with_*`
      - `sequence_numeric_column`.
    weight_collections: A list of collection names to which the Variable will be
      added. Note that variables will also be added to collections
      `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
    trainable: If `True` also add the variable to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES`.

  Returns:
    An `(input_layer, sequence_length)` tuple where:
    - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
        `T` is the maximum sequence length for this batch, which could differ
        from batch to batch. `D` is the sum of `num_elements` for all
        `feature_columns`.
    - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
        length for each example.

  Raises:
    ValueError: If any of the `feature_columns` is the wrong type.
  """
    feature_columns = fc._normalize_feature_columns(feature_columns)
    for c in feature_columns:
        if not isinstance(c, fc._SequenceDenseColumn):
            raise ValueError(
                'All feature_columns must be of type _SequenceDenseColumn. '
                'You can wrap a sequence_categorical_column with an embedding_column '
                'or indicator_column. '
                'Given (type {}): {}'.format(type(c), c))

    with variable_scope.variable_scope(None,
                                       default_name='sequence_input_layer',
                                       values=features.values()):
        builder = fc._LazyBuilder(features)
        output_tensors = []
        sequence_lengths = []
        ordered_columns = []

        for column in sorted(feature_columns, key=lambda x: x.name):
            ordered_columns.append(column)
            with variable_scope.variable_scope(
                    None, default_name=column._var_scope_name):
                dense_tensor, sequence_length = column._get_sequence_dense_tensor(
                    builder,
                    weight_collections=weight_collections,
                    trainable=trainable)
                # Flattens the final dimension to produce a 3D Tensor.
                num_elements = column._variable_shape.num_elements()
                shape = array_ops.shape(dense_tensor)
                target_shape = [shape[0], shape[1], num_elements]
                output_tensors.append(
                    array_ops.reshape(dense_tensor, shape=target_shape))
                sequence_lengths.append(sequence_length)

        fc._verify_static_batch_size_equality(output_tensors, ordered_columns)
        fc._verify_static_batch_size_equality(sequence_lengths,
                                              ordered_columns)
        sequence_length = _assert_all_equal_and_return(sequence_lengths)

        return array_ops.concat(output_tensors, -1), sequence_length