def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A tf.contrib.layer style linear prediction builder based on FeatureColumns. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. An example usage of weighted_sum_from_feature_columns is as follows: # Building model for training columns_to_tensor = tf.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels) where feature_columns can be defined as follows: occupation = sparse_column_with_hash_bucket(column_name="occupation", hash_bucket_size=1000) occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16, combiner="sum") age = real_valued_column("age") age_buckets = bucketized_column( source_column=age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) occupation_x_age = crossed_column(columns=[occupation, age_buckets], hash_bucket_size=10000) feature_columns=[occupation_emb, occupation_x_age] Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple of followings: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='weighted_sum_from_feature_columns', values=columns_to_tensors.values()): output_tensors = [] column_to_variable = dict() transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): with variable_scope.variable_scope( None, default_name=column.name, values=columns_to_tensors.values()): try: transformed_tensor = transformer.transform(column) predictions, variable = column.to_weighted_sum(transformed_tensor, num_outputs, weight_collections, trainable) except ValueError as e: raise ValueError('Error creating weighted sum for column: {}.\n' '{}'.format(column.name, e)) output_tensors.append(predictions) column_to_variable[column] = variable _log_variable(variable) predictions_no_bias = math_ops.add_n(output_tensors) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer, collections=fc._add_variable_collection(weight_collections)) # pylint: disable=protected-access _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias
def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A tf.contrib.layer style linear prediction builder based on FeatureColumns. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. An example usage of weighted_sum_from_feature_columns is as follows: # Building model for training columns_to_tensor = tf.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels) where feature_columns can be defined as follows: occupation = sparse_column_with_hash_bucket(column_name="occupation", hash_bucket_size=1000) occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16, combiner="sum") age = real_valued_column("age") age_buckets = bucketized_column( source_column=age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) occupation_x_age = crossed_column(columns=[occupation, age_buckets], hash_bucket_size=10000) feature_columns=[occupation_emb, occupation_x_age] Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple of followings: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='weighted_sum_from_feature_columns', values=columns_to_tensors.values()): output_tensors = [] column_to_variable = dict() transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments = column._to_embedding_lookup_arguments( # pylint: disable=protected-access transformed_tensor) variable, predictions = _create_embedding_lookup( column, columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) except NotImplementedError: with variable_scope.variable_scope( None, default_name=column.name, values=columns_to_tensors.values()): tensor = column._to_dense_tensor(transformed_tensor) # pylint: disable=protected-access variable = [ contrib_variables.model_variable( name='weight', shape=[tensor.get_shape()[1], num_outputs], initializer=init_ops.zeros_initializer, collections=weight_collections) ] predictions = math_ops.matmul(tensor, variable[0], name='matmul') except ValueError as ee: raise ValueError( 'Error creating weighted sum for column: {}.\n' '{}'.format(column.name, ee)) output_tensors.append(predictions) column_to_variable[column] = variable _log_variable(variable) _maybe_restore_from_checkpoint(column._checkpoint_path(), variable) # pylint: disable=protected-access predictions_no_bias = math_ops.add_n(output_tensors) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer, collections=fc._add_variable_collection(weight_collections)) # pylint: disable=protected-access _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias
def joint_weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A restricted linear prediction builder based on FeatureColumns. As long as all feature columns are unweighted sparse columns this computes the prediction of a linear model which stores all weights in a single variable. Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple of followings: * A Tensor which represents predictions of a linear model. * A list of Variables storing the weights. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='joint_weighted_sum_from_feature_columns', values=columns_to_tensors.values()): transformer = _Transformer(columns_to_tensors) embedding_lookup_arguments = [] for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments.append( column._to_embedding_lookup_arguments(transformed_tensor)) # pylint: disable=protected-access except NotImplementedError: raise NotImplementedError( 'Real-valued columns are not supported. ' 'Use weighted_sum_from_feature_columns ' 'instead, or bucketize these columns.') variable, predictions_no_bias = _create_joint_embedding_lookup( columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer, collections=fc._add_variable_collection(weight_collections)) # pylint: disable=protected-access _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, variable, bias
def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, name=None, trainable=True): """A tf.contrib.layer style linear prediction builder based on FeatureColumns. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. An example usage of weighted_sum_from_feature_columns is as follows: # Building model for training columns_to_tensor = tf.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels) where feature_columns can be defined as follows: query_word = sparse_column_with_hash_bucket( 'query_word', hash_bucket_size=int(1e6)) query_embedding = embedding_column(query_word, dimension=16) age_bucket = bucketized_column(real_valued_column('age'), boundaries=[18, 21, 30, 50, 70]) query_age = crossed_column([query_word, age_bucket], hash_bucket_size=1e6) feature_columns=[query_embedding, query_age] Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). Returns: A tuple of followings: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ with variable_scope.variable_op_scope(columns_to_tensors.values(), name, 'weighted_sum_from_feature_columns'): output_tensors = [] column_to_variable = dict() transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) predictions, variable = column.to_weighted_sum(transformed_tensor, num_outputs, weight_collections, trainable) output_tensors.append(predictions) column_to_variable[column] = variable _log_variable(variable) predictions_no_bias = math_ops.add_n(output_tensors) bias = variables.Variable( array_ops.zeros([num_outputs]), collections=fc._add_variable_collection(weight_collections), # pylint: disable=protected-access name='bias_weight') _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias
def weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, name=None, trainable=True): """A tf.contrib.layer style linear prediction builder based on FeatureColumns. Generally a single example in training data is described with feature columns. This function generates weighted sum for each num_outputs. Weighted sum refers to logits in classification problems. It refers to prediction itself for linear regression problems. An example usage of weighted_sum_from_feature_columns is as follows: # Building model for training columns_to_tensor = tf.parse_example(...) logits = weighted_sum_from_feature_columns( columns_to_tensors=columns_to_tensor, feature_columns=feature_columns, num_outputs=1) loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels) where feature_columns can be defined as follows: occupation = sparse_column_with_hash_bucket(column_name="occupation", hash_bucket_size=1000) occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16, combiner="sum") age = real_valued_column("age") age_buckets = bucketized_column( source_column=age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) occupation_x_age = crossed_column(columns=[occupation, age_buckets], hash_bucket_size=10000) feature_columns=[occupation_emb, occupation_x_age] Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. name: The name for this operation is used to name operations and to find variables. If specified it must be unique for this scope, otherwise a unique name starting with "fully_connected" will be created. See `tf.variable_op_scope` for details. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). Returns: A tuple of followings: * A Tensor which represents predictions of a linear model. * A dictionary which maps feature_column to corresponding Variable. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_op_scope(columns_to_tensors.values(), name, 'weighted_sum_from_feature_columns'): output_tensors = [] column_to_variable = dict() transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) predictions, variable = column.to_weighted_sum(transformed_tensor, num_outputs, weight_collections, trainable) output_tensors.append(predictions) column_to_variable[column] = variable _log_variable(variable) predictions_no_bias = math_ops.add_n(output_tensors) bias = variables.Variable( array_ops.zeros([num_outputs]), collections=fc._add_variable_collection(weight_collections), # pylint: disable=protected-access name='bias_weight') _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, column_to_variable, bias
def joint_weighted_sum_from_feature_columns(columns_to_tensors, feature_columns, num_outputs, weight_collections=None, trainable=True, scope=None): """A restricted linear prediction builder based on FeatureColumns. As long as all feature columns are unweighted sparse columns this computes the prediction of a linear model which stores all weights in a single variable. Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key means a base feature (not-transformed). It can have FeatureColumn as a key too. That means that FeatureColumn is already transformed by input pipeline. For example, `inflow` may have handled transformations. feature_columns: A set containing all the feature columns. All items in the set should be instances of classes derived from FeatureColumn. num_outputs: An integer specifying number of outputs. Default value is 1. weight_collections: List of graph collections to which weights are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional scope for variable_scope. Returns: A tuple of followings: * A Tensor which represents predictions of a linear model. * A list of Variables storing the weights. * A Variable which is used for bias. Raises: ValueError: if FeatureColumn cannot be used for linear predictions. """ check_feature_columns(feature_columns) with variable_scope.variable_scope( scope, default_name='joint_weighted_sum_from_feature_columns', values=columns_to_tensors.values()): transformer = _Transformer(columns_to_tensors) embedding_lookup_arguments = [] for column in sorted(set(feature_columns), key=lambda x: x.key): transformed_tensor = transformer.transform(column) try: embedding_lookup_arguments.append( column._to_embedding_lookup_arguments(transformed_tensor)) # pylint: disable=protected-access except NotImplementedError: raise NotImplementedError('Real-valued columns are not supported. ' 'Use weighted_sum_from_feature_columns ' 'instead, or bucketize these columns.') variable, predictions_no_bias = _create_joint_embedding_lookup( columns_to_tensors, embedding_lookup_arguments, num_outputs, trainable, weight_collections) bias = contrib_variables.model_variable( 'bias_weight', shape=[num_outputs], initializer=init_ops.zeros_initializer, collections=fc._add_variable_collection(weight_collections)) # pylint: disable=protected-access _log_variable(bias) predictions = nn_ops.bias_add(predictions_no_bias, bias) return predictions, variable, bias