def _model_variable_getter(getter, name, shape=None, dtype=None, initializer=None, regularizer=None, trainable=True, collections=None, caching_device=None, partitioner=None, rename=None, use_resource=None, **_): """Getter that uses model_variable for compatibility with core layers.""" short_name = name.split('/')[-1] if rename and short_name in rename: name_components = name.split('/') name_components[-1] = rename[short_name] name = '/'.join(name_components) return variables.model_variable(name, shape=shape, dtype=dtype, initializer=initializer, regularizer=regularizer, collections=collections, trainable=trainable, caching_device=caching_device, partitioner=partitioner, custom_getter=getter, use_resource=use_resource)
def bow_encoder(ids, vocab_size, embed_dim, sparse_lookup=True, initializer=None, regularizer=None, trainable=True, scope=None, reuse=None): """Maps a sequence of symbols to a vector per example by averaging embeddings. Args: ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type `int32` or `int64` with symbol ids. vocab_size: Integer number of symbols in vocabulary. embed_dim: Integer number of dimensions for embedding matrix. sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor` and performs a sparse embedding lookup. This is usually faster, but not desirable if padding tokens should have an embedding. Empty rows are assigned a special embedding. initializer: An initializer for the embeddings, if `None` default for current scope is used. regularizer: Optional regularizer for the embeddings. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional string specifying the variable scope for the op, required if `reuse=True`. reuse: If `True`, variables inside the op will be reused. Returns: Encoding `Tensor` `[batch_size, embed_dim]` produced by averaging embeddings. Raises: ValueError: If `embed_dim` or `vocab_size` are not specified. """ if not vocab_size or not embed_dim: raise ValueError('Must specify vocab size and embedding dimension') with variable_scope.variable_scope(scope, 'bow_encoder', [ids], reuse=reuse): embeddings = variables.model_variable('embeddings', shape=[vocab_size, embed_dim], initializer=initializer, regularizer=regularizer, trainable=trainable) if sparse_lookup: if isinstance(ids, sparse_tensor.SparseTensor): sparse_ids = ids else: sparse_ids = sparse_ops.dense_to_sparse_tensor(ids) return contrib_embedding_ops.safe_embedding_lookup_sparse( [embeddings], sparse_ids, combiner='mean', default_id=0) else: if isinstance(ids, sparse_tensor.SparseTensor): raise TypeError('ids are expected to be dense Tensor, got: %s', ids) return math_ops.reduce_mean(embedding_ops.embedding_lookup( embeddings, ids), axis=1)
def embed_sequence(ids, vocab_size=None, embed_dim=None, unique=False, initializer=None, regularizer=None, trainable=True, scope=None, reuse=None): """Maps a sequence of symbols to a sequence of embeddings. Typical use case would be reusing embeddings between an encoder and decoder. Args: ids: `[batch_size, doc_length]` `Tensor` of type `int32` or `int64` with symbol ids. vocab_size: Integer number of symbols in vocabulary. embed_dim: Integer number of dimensions for embedding matrix. unique: If `True`, will first compute the unique set of indices, and then lookup each embedding once, repeating them in the output as needed. initializer: An initializer for the embeddings, if `None` default for current scope is used. regularizer: Optional regularizer for the embeddings. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional string specifying the variable scope for the op, required if `reuse=True`. reuse: If `True`, variables inside the op will be reused. Returns: `Tensor` of `[batch_size, doc_length, embed_dim]` with embedded sequences. Raises: ValueError: if `embed_dim` or `vocab_size` are not specified when `reuse` is `None` or `False`. """ if not (reuse or (vocab_size and embed_dim)): raise ValueError( 'Must specify vocab size and embedding dimension when not ' 'reusing. Got vocab_size=%s and embed_dim=%s' % (vocab_size, embed_dim)) with variable_scope.variable_scope(scope, 'EmbedSequence', [ids], reuse=reuse): shape = [vocab_size, embed_dim] if reuse and vocab_size is None or embed_dim is None: shape = None embeddings = variables.model_variable('embeddings', shape=shape, initializer=initializer, regularizer=regularizer, trainable=trainable) if unique: return contrib_embedding_ops.embedding_lookup_unique( embeddings, ids) return embedding_ops.embedding_lookup(embeddings, ids)
def _create_joint_embedding_lookup(columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections): """Creates an embedding lookup for all columns sharing a single weight.""" for arg in embedding_lookup_arguments: assert arg.weight_tensor is None, ( 'Joint sums for weighted sparse columns are not supported. ' 'Please use weighted_sum_from_feature_columns instead.') assert arg.combiner == 'sum', ( 'Combiners other than sum are not supported for joint sums. ' 'Please use weighted_sum_from_feature_columns instead.') assert len(embedding_lookup_arguments) >= 1, ( 'At least one column must be in the model.') prev_size = 0 sparse_tensors = [] for a in embedding_lookup_arguments: t = a.input_tensor values = t.values + prev_size prev_size += a.vocab_size sparse_tensors.append( sparse_tensor_py.SparseTensor(t.indices, values, t.dense_shape)) sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors) with variable_scope.variable_scope(None, default_name='linear_weights', values=columns_to_tensors.values()): variable = contrib_variables.model_variable( name='weights', shape=[prev_size, num_outputs], dtype=dtypes.float32, initializer=init_ops.zeros_initializer(), trainable=trainable, collections=weight_collections) if fc._is_variable(variable): # pylint: disable=protected-access variable = [variable] else: variable = variable._get_variable_list() # pylint: disable=protected-access predictions = embedding_ops.safe_embedding_lookup_sparse( variable, sparse_tensor, sparse_weights=None, combiner='sum', name='_weights') return variable, predictions
def testSeparableConvWithResourceVar(self): graph = ops.Graph() with graph.as_default(): with variable_scope.variable_scope('', use_resource=True): batch_size, height, width, depth = 5, 128, 128, 3 input1 = array_ops.zeros((batch_size, height, width, depth)) kernel_size, depth_multiplier = 3, 1 depthwise_shape = [ kernel_size, kernel_size, depth, depth_multiplier ] depthwise_weights = variables.model_variable( 'depthwise_weights', shape=depthwise_shape) strides = [1, 1, 1, 1] with variable_scope.variable_scope('depthwise_conv_1'): conv1 = nn.depthwise_conv2d(input1, depthwise_weights, strides, padding='SAME') with variable_scope.variable_scope('depthwise_conv_2'): conv2 = nn.depthwise_conv2d(conv1, depthwise_weights, strides, padding='SAME') math_ops.add(conv2, input1, name='add') quantize.Quantize(graph, True) # Test that the weights and activations of all convs have been quantized. quant_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name( 'depthwise_conv_1/weights_quant/' + quant_node_name) self.assertEqual(weights_quant.type, quant_node_name) act_quant = graph.get_operation_by_name('depthwise_conv_1/act_quant/' + quant_node_name) self.assertEqual(act_quant.type, quant_node_name) weights_quant = graph.get_operation_by_name( 'depthwise_conv_2/weights_quant/' + quant_node_name) self.assertEqual(weights_quant.type, quant_node_name) act_quant = graph.get_operation_by_name('depthwise_conv_2/act_quant/' + quant_node_name) self.assertEqual(act_quant.type, quant_node_name)
def _create_embedding_lookup(column, columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections): """Creates variables and returns predictions for linear weights in a model. Args: column: the column we're working on. columns_to_tensors: a map from column name to tensors. embedding_lookup_arguments: arguments for embedding lookup. num_outputs: how many outputs. trainable: whether the variable we create is trainable. weight_collections: weights will be placed here. Returns: variables: the created embeddings. predictions: the computed predictions. """ with variable_scope.variable_scope(None, default_name=column.name, values=columns_to_tensors.values()): variable = contrib_variables.model_variable( name='weights', shape=[embedding_lookup_arguments.vocab_size, num_outputs], dtype=dtypes.float32, initializer=embedding_lookup_arguments.initializer, trainable=trainable, collections=weight_collections) if fc._is_variable(variable): # pylint: disable=protected-access variable = [variable] else: variable = variable._get_variable_list() # pylint: disable=protected-access predictions = embedding_ops.safe_embedding_lookup_sparse( variable, embedding_lookup_arguments.input_tensor, sparse_weights=embedding_lookup_arguments.weight_tensor, combiner=embedding_lookup_arguments.combiner, name=column.name + '_weights') return variable, predictions
def instance_norm(inputs, center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, data_format=DATA_FORMAT_NHWC, scope=None): """Functional interface for the instance normalization layer. Reference: https://arxiv.org/abs/1607.08022. "Instance Normalization: The Missing Ingredient for Fast Stylization" Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky Args: inputs: A tensor with 2 or more dimensions, where the first dimension has `batch_size`. The normalization is over all but the last dimension if `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). data_format: A string. `NHWC` (default) and `NCHW` are supported. scope: Optional scope for `variable_scope`. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If `data_format` is neither `NHWC` nor `NCHW`. ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. """ inputs = ops.convert_to_tensor(inputs) inputs_shape = inputs.shape inputs_rank = inputs.shape.ndims if inputs_rank is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC): raise ValueError('data_format has to be either NCHW or NHWC.') with variable_scope.variable_scope(scope, 'InstanceNorm', [inputs], reuse=reuse) as sc: if data_format == DATA_FORMAT_NCHW: reduction_axis = 1 # For NCHW format, rather than relying on implicit broadcasting, we # explicitly reshape the params to params_shape_broadcast when computing # the moments and the batch normalization. params_shape_broadcast = list([1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)]) else: reduction_axis = inputs_rank - 1 params_shape_broadcast = None moments_axes = list(range(inputs_rank)) del moments_axes[reduction_axis] del moments_axes[0] params_shape = inputs_shape[reduction_axis:reduction_axis + 1] if not params_shape.is_fully_defined(): raise ValueError('Inputs %s has undefined channels dimension %s.' % (inputs.name, params_shape)) # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) if params_shape_broadcast: beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) if params_shape_broadcast: gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments (instance activations). mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute instance normalization. outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon, name='instancenorm') if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def group_norm(inputs, groups=32, channels_axis=-1, reduction_axes=(-3, -2), center=True, scale=True, epsilon=1e-6, activation_fn=None, param_initializers=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None, mean_close_to_zero=False): """Functional interface for the group normalization layer. Reference: https://arxiv.org/abs/1803.08494. "Group Normalization", Yuxin Wu, Kaiming He Args: inputs: A Tensor with at least 2 dimensions one which is channels. All shape dimensions except for batch must be fully defined. groups: Integer. Divide the channels into this number of groups over which normalization statistics are computed. This number must be commensurate with the number of channels in `inputs`. channels_axis: An integer. Specifies index of channels axis which will be broken into `groups`, each of which whose statistics will be computed across. Must be mutually exclusive with `reduction_axes`. Preferred usage is to specify negative integers to be agnostic as to whether a batch dimension is included. reduction_axes: Tuple of integers. Specifies dimensions over which statistics will be accumulated. Must be mutually exclusive with `channels_axis`. Statistics will not be accumulated across axes not specified in `reduction_axes` nor `channel_axis`. Preferred usage is to specify negative integers to be agnostic to whether a batch dimension is included. Some sample usage cases: NHWC format: channels_axis=-1, reduction_axes=[-3, -2] NCHW format: channels_axis=-3, reduction_axes=[-2, -1] center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling can be done by the next layer. epsilon: Small float added to variance to avoid dividing by zero. activation_fn: Activation function, default set to None to skip it and maintain a linear activation. param_initializers: Optional initializers for beta, gamma, moving mean and moving variance. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional collections for the variables. outputs_collections: Collections to add the outputs. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. mean_close_to_zero: The mean of `input` before ReLU will be close to zero when batch size >= 4k for Resnet-50 on TPU. If `True`, use `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the variance. This is the same behavior as `fused` equals `True` in batch normalization. If `False`, use `nn.moments` to calculate the variance. When `mean` is close to zero, like 1e-4, use `mean` to calculate the variance may have poor result due to repeated roundoff error and denormalization in `mean`. When `mean` is large, like 1e2, sum(`input`^2) is so large that only the high-order digits of the elements are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2) when `mean` is large. Returns: A `Tensor` representing the output of the operation. Raises: ValueError: If the rank of `inputs` is undefined. ValueError: If rank or channels dimension of `inputs` is undefined. ValueError: If number of groups is not commensurate with number of channels. ValueError: If reduction_axes or channels_axis are out of bounds. ValueError: If reduction_axes are not mutually exclusive with channels_axis. """ # TODO(shlens): Support partially defined shapes for the inputs. inputs = ops.convert_to_tensor(inputs) if inputs.shape.ndims is None: raise ValueError('Inputs %s has undefined rank.' % inputs.name) if channels_axis > (inputs.shape.ndims - 1): raise ValueError('Axis is out of bounds.') # Use dynamic shape for not fully defined dimensions in the inputs. dyanmic_shape = array_ops.shape(inputs) input_shape_list = [] for i, dim in enumerate(inputs.shape): if dim.value is None: input_shape_list.append(dyanmic_shape[i]) else: input_shape_list.append(dim) # Standardize the channels_axis to be positive and identify # of channels. if channels_axis < 0: channels_axis = inputs.shape.ndims + channels_axis channels = inputs.shape[channels_axis].value if channels is None: raise ValueError('Inputs %s has undefined channel dimension: %d.' % (inputs.name, channels_axis)) # Standardize the reduction_axes to be positive. reduction_axes = list(reduction_axes) for i in range(len(reduction_axes)): if reduction_axes[i] < 0: reduction_axes[i] += inputs.shape.ndims for a in reduction_axes: if a > inputs.shape.ndims: raise ValueError('Axis is out of bounds.') if inputs.shape[a].value is None: raise ValueError('Inputs %s has undefined dimensions %d.' % (inputs.name, a)) if channels_axis == a: raise ValueError('reduction_axis must be mutually exclusive ' 'with channels_axis') if groups > channels: raise ValueError('Invalid groups %d for %d channels.' % (groups, channels)) if channels % groups != 0: raise ValueError('%d channels is not commensurate with %d groups.' % (channels, groups)) # Determine axes before channels. Some examples of common image formats: # 'NCHW': before = [N], after = [HW] # 'NHWC': before = [NHW], after = [] axes_before_channels = input_shape_list[:channels_axis] axes_after_channels = input_shape_list[channels_axis + 1:] # Manually broadcast the parameters to conform to the number of groups. params_shape_broadcast = ([1] * len(axes_before_channels) + [groups, channels // groups] + [1] * len(axes_after_channels)) # Reshape the input by the group within the channel dimension. inputs_shape = (axes_before_channels + [groups, channels // groups] + axes_after_channels) inputs = array_ops.reshape(inputs, inputs_shape) # Determine the dimensions across which moments are calculated. moments_axes = [channels_axis + 1] for a in reduction_axes: if a > channels_axis: moments_axes.append(a + 1) else: moments_axes.append(a) with variable_scope.variable_scope(scope, 'GroupNorm', [inputs], reuse=reuse) as sc: # Note that the params_shape is the number of channels always. params_shape = [channels] # Allocate parameters for the beta and gamma of the normalization. beta, gamma = None, None dtype = inputs.dtype.base_dtype if param_initializers is None: param_initializers = {} if center: beta_collections = utils.get_variable_collections( variables_collections, 'beta') beta_initializer = param_initializers.get( 'beta', init_ops.zeros_initializer()) beta = variables.model_variable('beta', shape=params_shape, dtype=dtype, initializer=beta_initializer, collections=beta_collections, trainable=trainable) beta = array_ops.reshape(beta, params_shape_broadcast) if scale: gamma_collections = utils.get_variable_collections( variables_collections, 'gamma') gamma_initializer = param_initializers.get( 'gamma', init_ops.ones_initializer()) gamma = variables.model_variable('gamma', shape=params_shape, dtype=dtype, initializer=gamma_initializer, collections=gamma_collections, trainable=trainable) gamma = array_ops.reshape(gamma, params_shape_broadcast) # Calculate the moments. if mean_close_to_zero: # One pass algorithm returns better result when mean is close to zero. counts, means_ss, variance_ss, _ = nn.sufficient_statistics( inputs, moments_axes, keep_dims=True) mean, variance = nn.normalize_moments(counts, means_ss, variance_ss, shift=None) else: mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) # Compute normalization. # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor # appropriately so that this operation may be faster. gain = math_ops.rsqrt(variance + epsilon) offset = -mean * gain if gamma is not None: gain *= gamma offset *= gamma if beta is not None: offset += beta outputs = inputs * gain + offset # Collapse the groups into the channel dimension. outputs = array_ops.reshape(outputs, input_shape_list) if activation_fn is not None: outputs = activation_fn(outputs) return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A tf.contrib.layers style linear prediction builder based on FeatureColumn. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. Example: ``` # Building model for training feature_columns = ( real_valued_column("my_feature1"), ... ) columns_to_tensor = tf.io.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) ``` Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple containing: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ columns_to_tensors = columns_to_tensors.copy() check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='weighted_sum_from_feature_columns', values=columns_to_tensors.values()): output_tensors = [] column_to_variable = {} transformer = _Transformer(columns_to_tensors) # pylint: disable=protected-access for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments = column._wide_embedding_lookup_arguments( transformed_tensor) variable, predictions = _create_embedding_lookup( column, columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) except NotImplementedError: with variable_scope.variable_scope( None, default_name=column.name, values=columns_to_tensors.values()): tensor = column._to_dense_tensor(transformed_tensor) tensor = _maybe_reshape_input_tensor(tensor, column.name, output_rank=2) variable = [ contrib_variables.model_variable( name='weight', shape=[tensor.get_shape()[1], num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=weight_collections) ] predictions = math_ops.matmul(tensor, variable[0], name='matmul') except ValueError as ee: raise ValueError( 'Error creating weighted sum for column: {}.\n' '{}'.format(column.name, ee)) output_tensors.append( array_ops.reshape(predictions, shape=(-1, num_outputs))) column_to_variable[column] = variable _log_variable(variable) fc._maybe_restore_from_checkpoint(column._checkpoint_path(), variable) # pylint: disable=protected-access # pylint: enable=protected-access predictions_no_bias = math_ops.add_n(output_tensors) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=_add_variable_collection(weight_collections)) _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias
def joint_weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A restricted linear prediction builder based on FeatureColumns. As long as all feature columns are unweighted sparse columns this computes the prediction of a linear model which stores all weights in a single variable. Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple containing: * A Tensor which represents predictions of a linear model. * A list of Variables storing the weights. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ columns_to_tensors = columns_to_tensors.copy() check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='joint_weighted_sum_from_feature_columns', values=columns_to_tensors.values()): transformer = _Transformer(columns_to_tensors) embedding_lookup_arguments = [] for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments.append( column._wide_embedding_lookup_arguments( transformed_tensor)) # pylint: disable=protected-access except NotImplementedError: raise NotImplementedError( 'Real-valued columns are not supported. ' 'Use weighted_sum_from_feature_columns ' 'instead, or bucketize these columns.') variable, predictions_no_bias = _create_joint_embedding_lookup( columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer(), trainable=trainable, collections=_add_variable_collection(weight_collections)) _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, variable, bias