Beispiel #1
0
def _model_variable_getter(getter,
                           name,
                           shape=None,
                           dtype=None,
                           initializer=None,
                           regularizer=None,
                           trainable=True,
                           collections=None,
                           caching_device=None,
                           partitioner=None,
                           rename=None,
                           use_resource=None,
                           **_):
    """Getter that uses model_variable for compatibility with core layers."""
    short_name = name.split('/')[-1]
    if rename and short_name in rename:
        name_components = name.split('/')
        name_components[-1] = rename[short_name]
        name = '/'.join(name_components)
    return variables.model_variable(name,
                                    shape=shape,
                                    dtype=dtype,
                                    initializer=initializer,
                                    regularizer=regularizer,
                                    collections=collections,
                                    trainable=trainable,
                                    caching_device=caching_device,
                                    partitioner=partitioner,
                                    custom_getter=getter,
                                    use_resource=use_resource)
def bow_encoder(ids,
                vocab_size,
                embed_dim,
                sparse_lookup=True,
                initializer=None,
                regularizer=None,
                trainable=True,
                scope=None,
                reuse=None):
    """Maps a sequence of symbols to a vector per example by averaging embeddings.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type
      `int32` or `int64` with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor`
        and performs a sparse embedding lookup. This is usually faster,
        but not desirable if padding tokens should have an embedding. Empty rows
        are assigned a special embedding.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    Encoding `Tensor` `[batch_size, embed_dim]` produced by
    averaging embeddings.

  Raises:
    ValueError: If `embed_dim` or `vocab_size` are not specified.
  """
    if not vocab_size or not embed_dim:
        raise ValueError('Must specify vocab size and embedding dimension')
    with variable_scope.variable_scope(scope,
                                       'bow_encoder', [ids],
                                       reuse=reuse):
        embeddings = variables.model_variable('embeddings',
                                              shape=[vocab_size, embed_dim],
                                              initializer=initializer,
                                              regularizer=regularizer,
                                              trainable=trainable)
        if sparse_lookup:
            if isinstance(ids, sparse_tensor.SparseTensor):
                sparse_ids = ids
            else:
                sparse_ids = sparse_ops.dense_to_sparse_tensor(ids)
            return contrib_embedding_ops.safe_embedding_lookup_sparse(
                [embeddings], sparse_ids, combiner='mean', default_id=0)
        else:
            if isinstance(ids, sparse_tensor.SparseTensor):
                raise TypeError('ids are expected to be dense Tensor, got: %s',
                                ids)
            return math_ops.reduce_mean(embedding_ops.embedding_lookup(
                embeddings, ids),
                                        axis=1)
def embed_sequence(ids,
                   vocab_size=None,
                   embed_dim=None,
                   unique=False,
                   initializer=None,
                   regularizer=None,
                   trainable=True,
                   scope=None,
                   reuse=None):
    """Maps a sequence of symbols to a sequence of embeddings.

  Typical use case would be reusing embeddings between an encoder and decoder.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` of type `int32` or `int64`
      with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    unique: If `True`, will first compute the unique set of indices, and then
         lookup each embedding once, repeating them in the output as needed.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    `Tensor` of `[batch_size, doc_length, embed_dim]` with embedded sequences.

  Raises:
    ValueError: if `embed_dim` or `vocab_size` are not specified when
      `reuse` is `None` or `False`.
  """
    if not (reuse or (vocab_size and embed_dim)):
        raise ValueError(
            'Must specify vocab size and embedding dimension when not '
            'reusing. Got vocab_size=%s and embed_dim=%s' %
            (vocab_size, embed_dim))
    with variable_scope.variable_scope(scope,
                                       'EmbedSequence', [ids],
                                       reuse=reuse):
        shape = [vocab_size, embed_dim]
        if reuse and vocab_size is None or embed_dim is None:
            shape = None
        embeddings = variables.model_variable('embeddings',
                                              shape=shape,
                                              initializer=initializer,
                                              regularizer=regularizer,
                                              trainable=trainable)
        if unique:
            return contrib_embedding_ops.embedding_lookup_unique(
                embeddings, ids)
        return embedding_ops.embedding_lookup(embeddings, ids)
Beispiel #4
0
def _create_joint_embedding_lookup(columns_to_tensors,
                                   embedding_lookup_arguments, num_outputs,
                                   trainable, weight_collections):
    """Creates an embedding lookup for all columns sharing a single weight."""
    for arg in embedding_lookup_arguments:
        assert arg.weight_tensor is None, (
            'Joint sums for weighted sparse columns are not supported. '
            'Please use weighted_sum_from_feature_columns instead.')
        assert arg.combiner == 'sum', (
            'Combiners other than sum are not supported for joint sums. '
            'Please use weighted_sum_from_feature_columns instead.')
    assert len(embedding_lookup_arguments) >= 1, (
        'At least one column must be in the model.')
    prev_size = 0
    sparse_tensors = []
    for a in embedding_lookup_arguments:
        t = a.input_tensor
        values = t.values + prev_size
        prev_size += a.vocab_size
        sparse_tensors.append(
            sparse_tensor_py.SparseTensor(t.indices, values, t.dense_shape))
    sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
    with variable_scope.variable_scope(None,
                                       default_name='linear_weights',
                                       values=columns_to_tensors.values()):
        variable = contrib_variables.model_variable(
            name='weights',
            shape=[prev_size, num_outputs],
            dtype=dtypes.float32,
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=weight_collections)
        if fc._is_variable(variable):  # pylint: disable=protected-access
            variable = [variable]
        else:
            variable = variable._get_variable_list()  # pylint: disable=protected-access
        predictions = embedding_ops.safe_embedding_lookup_sparse(
            variable,
            sparse_tensor,
            sparse_weights=None,
            combiner='sum',
            name='_weights')
        return variable, predictions
Beispiel #5
0
    def testSeparableConvWithResourceVar(self):
        graph = ops.Graph()
        with graph.as_default():
            with variable_scope.variable_scope('', use_resource=True):
                batch_size, height, width, depth = 5, 128, 128, 3
                input1 = array_ops.zeros((batch_size, height, width, depth))
                kernel_size, depth_multiplier = 3, 1
                depthwise_shape = [
                    kernel_size, kernel_size, depth, depth_multiplier
                ]
                depthwise_weights = variables.model_variable(
                    'depthwise_weights', shape=depthwise_shape)
                strides = [1, 1, 1, 1]
                with variable_scope.variable_scope('depthwise_conv_1'):
                    conv1 = nn.depthwise_conv2d(input1,
                                                depthwise_weights,
                                                strides,
                                                padding='SAME')
                with variable_scope.variable_scope('depthwise_conv_2'):
                    conv2 = nn.depthwise_conv2d(conv1,
                                                depthwise_weights,
                                                strides,
                                                padding='SAME')
                    math_ops.add(conv2, input1, name='add')

        quantize.Quantize(graph, True)

        # Test that the weights and activations of all convs have been quantized.
        quant_node_name = 'FakeQuantWithMinMaxVars'
        weights_quant = graph.get_operation_by_name(
            'depthwise_conv_1/weights_quant/' + quant_node_name)
        self.assertEqual(weights_quant.type, quant_node_name)
        act_quant = graph.get_operation_by_name('depthwise_conv_1/act_quant/' +
                                                quant_node_name)
        self.assertEqual(act_quant.type, quant_node_name)

        weights_quant = graph.get_operation_by_name(
            'depthwise_conv_2/weights_quant/' + quant_node_name)
        self.assertEqual(weights_quant.type, quant_node_name)
        act_quant = graph.get_operation_by_name('depthwise_conv_2/act_quant/' +
                                                quant_node_name)
        self.assertEqual(act_quant.type, quant_node_name)
Beispiel #6
0
def _create_embedding_lookup(column, columns_to_tensors,
                             embedding_lookup_arguments, num_outputs,
                             trainable, weight_collections):
    """Creates variables and returns predictions for linear weights in a model.

  Args:
   column: the column we're working on.
   columns_to_tensors: a map from column name to tensors.
   embedding_lookup_arguments: arguments for embedding lookup.
   num_outputs: how many outputs.
   trainable: whether the variable we create is trainable.
   weight_collections: weights will be placed here.

  Returns:
  variables: the created embeddings.
  predictions: the computed predictions.
  """
    with variable_scope.variable_scope(None,
                                       default_name=column.name,
                                       values=columns_to_tensors.values()):
        variable = contrib_variables.model_variable(
            name='weights',
            shape=[embedding_lookup_arguments.vocab_size, num_outputs],
            dtype=dtypes.float32,
            initializer=embedding_lookup_arguments.initializer,
            trainable=trainable,
            collections=weight_collections)
        if fc._is_variable(variable):  # pylint: disable=protected-access
            variable = [variable]
        else:
            variable = variable._get_variable_list()  # pylint: disable=protected-access
        predictions = embedding_ops.safe_embedding_lookup_sparse(
            variable,
            embedding_lookup_arguments.input_tensor,
            sparse_weights=embedding_lookup_arguments.weight_tensor,
            combiner=embedding_lookup_arguments.combiner,
            name=column.name + '_weights')
        return variable, predictions
def instance_norm(inputs,
                  center=True,
                  scale=True,
                  epsilon=1e-6,
                  activation_fn=None,
                  param_initializers=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  data_format=DATA_FORMAT_NHWC,
                  scope=None):
    """Functional interface for the instance normalization layer.

  Reference: https://arxiv.org/abs/1607.08022.

    "Instance Normalization: The Missing Ingredient for Fast Stylization"
    Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky

  Args:
    inputs: A tensor with 2 or more dimensions, where the first dimension has
      `batch_size`. The normalization is over all but the last dimension if
      `data_format` is `NHWC` and the second dimension if `data_format` is
      `NCHW`.
    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    data_format: A string. `NHWC` (default) and `NCHW` are supported.
    scope: Optional scope for `variable_scope`.

  Returns:
    A `Tensor` representing the output of the operation.

  Raises:
    ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
  """
    inputs = ops.convert_to_tensor(inputs)
    inputs_shape = inputs.shape
    inputs_rank = inputs.shape.ndims

    if inputs_rank is None:
        raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')

    with variable_scope.variable_scope(scope,
                                       'InstanceNorm', [inputs],
                                       reuse=reuse) as sc:
        if data_format == DATA_FORMAT_NCHW:
            reduction_axis = 1
            # For NCHW format, rather than relying on implicit broadcasting, we
            # explicitly reshape the params to params_shape_broadcast when computing
            # the moments and the batch normalization.
            params_shape_broadcast = list([1, inputs_shape[1].value] +
                                          [1 for _ in range(2, inputs_rank)])
        else:
            reduction_axis = inputs_rank - 1
            params_shape_broadcast = None
        moments_axes = list(range(inputs_rank))
        del moments_axes[reduction_axis]
        del moments_axes[0]
        params_shape = inputs_shape[reduction_axis:reduction_axis + 1]
        if not params_shape.is_fully_defined():
            raise ValueError('Inputs %s has undefined channels dimension %s.' %
                             (inputs.name, params_shape))

        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        dtype = inputs.dtype.base_dtype
        if param_initializers is None:
            param_initializers = {}
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            beta = variables.model_variable('beta',
                                            shape=params_shape,
                                            dtype=dtype,
                                            initializer=beta_initializer,
                                            collections=beta_collections,
                                            trainable=trainable)
            if params_shape_broadcast:
                beta = array_ops.reshape(beta, params_shape_broadcast)
        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            gamma = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=gamma_initializer,
                                             collections=gamma_collections,
                                             trainable=trainable)
            if params_shape_broadcast:
                gamma = array_ops.reshape(gamma, params_shape_broadcast)

        # Calculate the moments (instance activations).
        mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

        # Compute instance normalization.
        outputs = nn.batch_normalization(inputs,
                                         mean,
                                         variance,
                                         beta,
                                         gamma,
                                         epsilon,
                                         name='instancenorm')
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
def group_norm(inputs,
               groups=32,
               channels_axis=-1,
               reduction_axes=(-3, -2),
               center=True,
               scale=True,
               epsilon=1e-6,
               activation_fn=None,
               param_initializers=None,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None,
               mean_close_to_zero=False):
    """Functional interface for the group normalization layer.

  Reference: https://arxiv.org/abs/1803.08494.

    "Group Normalization", Yuxin Wu, Kaiming He

  Args:
    inputs: A Tensor with at least 2 dimensions one which is channels. All
     shape dimensions except for batch must be fully defined.
    groups: Integer. Divide the channels into this number of groups over which
      normalization statistics are computed. This number must be commensurate
      with the number of channels in `inputs`.
    channels_axis: An integer. Specifies index of channels axis which will be
      broken into `groups`, each of which whose statistics will be computed
      across. Must be mutually exclusive with `reduction_axes`. Preferred usage
      is to specify negative integers to be agnostic as to whether a batch
      dimension is included.
    reduction_axes: Tuple of integers. Specifies dimensions over which
       statistics will be accumulated. Must be mutually exclusive with
       `channels_axis`. Statistics will not be accumulated across axes not
       specified in `reduction_axes` nor `channel_axis`. Preferred usage is to
       specify negative integers to be agnostic to whether a batch dimension is
       included.

      Some sample usage cases:
        NHWC format: channels_axis=-1, reduction_axes=[-3, -2]
        NCHW format: channels_axis=-3, reduction_axes=[-2, -1]

    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.
    mean_close_to_zero: The mean of `input` before ReLU will be close to zero
      when batch size >= 4k for Resnet-50 on TPU. If `True`, use
      `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the
      variance. This is the same behavior as `fused` equals `True` in batch
      normalization. If `False`, use `nn.moments` to calculate the variance.
      When `mean` is close to zero, like 1e-4, use `mean` to calculate the
      variance may have poor result due to repeated roundoff error and
      denormalization in `mean`.  When `mean` is large, like 1e2,
      sum(`input`^2) is so large that only the high-order digits of the elements
      are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate
      the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2)
      when `mean` is large.


  Returns:
    A `Tensor` representing the output of the operation.

  Raises:
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
    ValueError: If number of groups is not commensurate with number of channels.
    ValueError: If reduction_axes or channels_axis are out of bounds.
    ValueError: If reduction_axes are not mutually exclusive with channels_axis.
  """
    # TODO(shlens): Support partially defined shapes for the inputs.
    inputs = ops.convert_to_tensor(inputs)

    if inputs.shape.ndims is None:
        raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    if channels_axis > (inputs.shape.ndims - 1):
        raise ValueError('Axis is out of bounds.')

    # Use dynamic shape for not fully defined dimensions in the inputs.
    dyanmic_shape = array_ops.shape(inputs)
    input_shape_list = []
    for i, dim in enumerate(inputs.shape):
        if dim.value is None:
            input_shape_list.append(dyanmic_shape[i])
        else:
            input_shape_list.append(dim)

    # Standardize the channels_axis to be positive and identify # of channels.
    if channels_axis < 0:
        channels_axis = inputs.shape.ndims + channels_axis
    channels = inputs.shape[channels_axis].value

    if channels is None:
        raise ValueError('Inputs %s has undefined channel dimension: %d.' %
                         (inputs.name, channels_axis))

    # Standardize the reduction_axes to be positive.
    reduction_axes = list(reduction_axes)
    for i in range(len(reduction_axes)):
        if reduction_axes[i] < 0:
            reduction_axes[i] += inputs.shape.ndims

    for a in reduction_axes:
        if a > inputs.shape.ndims:
            raise ValueError('Axis is out of bounds.')
        if inputs.shape[a].value is None:
            raise ValueError('Inputs %s has undefined dimensions %d.' %
                             (inputs.name, a))
        if channels_axis == a:
            raise ValueError('reduction_axis must be mutually exclusive '
                             'with channels_axis')
    if groups > channels:
        raise ValueError('Invalid groups %d for %d channels.' %
                         (groups, channels))
    if channels % groups != 0:
        raise ValueError('%d channels is not commensurate with %d groups.' %
                         (channels, groups))

    # Determine axes before channels. Some examples of common image formats:
    #  'NCHW': before = [N], after = [HW]
    #  'NHWC': before = [NHW], after = []
    axes_before_channels = input_shape_list[:channels_axis]
    axes_after_channels = input_shape_list[channels_axis + 1:]

    # Manually broadcast the parameters to conform to the number of groups.
    params_shape_broadcast = ([1] * len(axes_before_channels) +
                              [groups, channels // groups] +
                              [1] * len(axes_after_channels))

    # Reshape the input by the group within the channel dimension.
    inputs_shape = (axes_before_channels + [groups, channels // groups] +
                    axes_after_channels)
    inputs = array_ops.reshape(inputs, inputs_shape)

    # Determine the dimensions across which moments are calculated.
    moments_axes = [channels_axis + 1]
    for a in reduction_axes:
        if a > channels_axis:
            moments_axes.append(a + 1)
        else:
            moments_axes.append(a)

    with variable_scope.variable_scope(scope,
                                       'GroupNorm', [inputs],
                                       reuse=reuse) as sc:
        # Note that the params_shape is the number of channels always.
        params_shape = [channels]

        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        dtype = inputs.dtype.base_dtype
        if param_initializers is None:
            param_initializers = {}
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            beta = variables.model_variable('beta',
                                            shape=params_shape,
                                            dtype=dtype,
                                            initializer=beta_initializer,
                                            collections=beta_collections,
                                            trainable=trainable)
            beta = array_ops.reshape(beta, params_shape_broadcast)

        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            gamma = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=gamma_initializer,
                                             collections=gamma_collections,
                                             trainable=trainable)
            gamma = array_ops.reshape(gamma, params_shape_broadcast)

        # Calculate the moments.
        if mean_close_to_zero:
            # One pass algorithm returns better result when mean is close to zero.
            counts, means_ss, variance_ss, _ = nn.sufficient_statistics(
                inputs, moments_axes, keep_dims=True)
            mean, variance = nn.normalize_moments(counts,
                                                  means_ss,
                                                  variance_ss,
                                                  shift=None)
        else:
            mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

        # Compute normalization.
        # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor
        # appropriately so that this operation may be faster.
        gain = math_ops.rsqrt(variance + epsilon)
        offset = -mean * gain
        if gamma is not None:
            gain *= gamma
            offset *= gamma
        if beta is not None:
            offset += beta
        outputs = inputs * gain + offset

        # Collapse the groups into the channel dimension.
        outputs = array_ops.reshape(outputs, input_shape_list)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
Beispiel #9
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
    """A tf.contrib.layers style linear prediction builder based on FeatureColumn.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  Example:

    ```
    # Building model for training
    feature_columns = (
        real_valued_column("my_feature1"),
        ...
    )
    columns_to_tensor = tf.io.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                   logits=logits)
    ```

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
    columns_to_tensors = columns_to_tensors.copy()
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        output_tensors = []
        column_to_variable = {}
        transformer = _Transformer(columns_to_tensors)
        # pylint: disable=protected-access
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments = column._wide_embedding_lookup_arguments(
                    transformed_tensor)
                variable, predictions = _create_embedding_lookup(
                    column, columns_to_tensors, embedding_lookup_arguments,
                    num_outputs, trainable, weight_collections)
            except NotImplementedError:
                with variable_scope.variable_scope(
                        None,
                        default_name=column.name,
                        values=columns_to_tensors.values()):
                    tensor = column._to_dense_tensor(transformed_tensor)
                    tensor = _maybe_reshape_input_tensor(tensor,
                                                         column.name,
                                                         output_rank=2)
                    variable = [
                        contrib_variables.model_variable(
                            name='weight',
                            shape=[tensor.get_shape()[1], num_outputs],
                            initializer=init_ops.zeros_initializer(),
                            trainable=trainable,
                            collections=weight_collections)
                    ]
                    predictions = math_ops.matmul(tensor,
                                                  variable[0],
                                                  name='matmul')
            except ValueError as ee:
                raise ValueError(
                    'Error creating weighted sum for column: {}.\n'
                    '{}'.format(column.name, ee))
            output_tensors.append(
                array_ops.reshape(predictions, shape=(-1, num_outputs)))
            column_to_variable[column] = variable
            _log_variable(variable)
            fc._maybe_restore_from_checkpoint(column._checkpoint_path(),
                                              variable)  # pylint: disable=protected-access
        # pylint: enable=protected-access
        predictions_no_bias = math_ops.add_n(output_tensors)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=_add_variable_collection(weight_collections))
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, column_to_variable, bias
Beispiel #10
0
def joint_weighted_sum_from_feature_columns(columns_to_tensors,
                                            feature_columns,
                                            num_outputs,
                                            weight_collections=None,
                                            trainable=True,
                                            scope=None):
    """A restricted linear prediction builder based on FeatureColumns.

  As long as all feature columns are unweighted sparse columns this computes the
  prediction of a linear model which stores all weights in a single variable.

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

    * A Tensor which represents predictions of a linear model.
    * A list of Variables storing the weights.
    * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.

  """
    columns_to_tensors = columns_to_tensors.copy()
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='joint_weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        transformer = _Transformer(columns_to_tensors)
        embedding_lookup_arguments = []
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments.append(
                    column._wide_embedding_lookup_arguments(
                        transformed_tensor))  # pylint: disable=protected-access
            except NotImplementedError:
                raise NotImplementedError(
                    'Real-valued columns are not supported. '
                    'Use weighted_sum_from_feature_columns '
                    'instead, or bucketize these columns.')

        variable, predictions_no_bias = _create_joint_embedding_lookup(
            columns_to_tensors, embedding_lookup_arguments, num_outputs,
            trainable, weight_collections)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=_add_variable_collection(weight_collections))
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, variable, bias