def build_model(self, features, feature_columns, is_training): """See base class.""" self._feature_columns = feature_columns partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( self._scope, values=features.values(), partitioner=partitioner) as scope: if self._joint_weights: logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._get_feature_columns(), num_outputs=self._num_label_columns, weight_collections=[self._scope], scope=scope) else: logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._get_feature_columns(), num_outputs=self._num_label_columns, weight_collections=[self._scope], scope=scope) return logits
def _get_train_ops(self, features, targets): """See base class.""" self._validate_linear_feature_columns(features) if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer): return super(LinearClassifier, self)._get_train_ops(features, targets) # SDCA currently supports binary classification only. if self._target_column.num_label_columns > 2: raise ValueError( "SDCA does not currently support multi-class classification.") global_step = contrib_variables.get_global_step() assert global_step logits, columns_to_variables, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._linear_feature_columns, num_outputs=self._target_column.num_label_columns, weight_collections=[self._linear_weight_collection], name="linear") with ops.control_dependencies([self._centered_bias()]): loss = self._loss(logits, targets, features) logging_ops.scalar_summary("loss", loss) train_ops = self._linear_optimizer.get_train_step( self._linear_feature_columns, self._target_column.weight_column_name, "logistic_loss", features, targets, columns_to_variables, global_step) return train_ops, loss
def _get_train_ops(self, features, targets): """See base class.""" if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer): return super(LinearRegressor, self)._get_train_ops(features, targets) global_step = contrib_variables.get_or_create_global_step() logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._linear_feature_columns, num_outputs=self._target_column.num_label_columns, weight_collections=[self._linear_model.get_scope_name()], scope=self._linear_model.get_scope_name())) with ops.control_dependencies([self._centered_bias()]): loss = self._target_column.loss(logits, targets, features) logging_ops.scalar_summary("loss", loss) train_feature_columns = _maybe_add_bias_column( self._linear_feature_columns, features, bias, targets, self._enable_centered_bias, columns_to_variables) train_op = self._linear_optimizer.get_train_step( train_feature_columns, self._target_column.weight_column_name, self._loss_type(), features, targets, columns_to_variables, global_step) return train_op, loss
def _get_train_ops(self, features, targets): """See base class.""" if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer): return super(LinearRegressor, self)._get_train_ops(features, targets) assert not self._joint_weights, ("_joint_weights is incompatible with" " SDCAOptimizer.") global_step = contrib_variables.get_or_create_global_step() logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._linear_feature_columns, num_outputs=self._head.logits_dimension, weight_collections=[self._linear_model.get_scope_name()], scope=self._linear_model.get_scope_name())) _add_bias_column(self._linear_feature_columns, features, bias, targets, columns_to_variables) def _train_op_fn(unused_loss): sdca_model, train_op = self._linear_optimizer.get_train_step( columns_to_variables, self._weight_column_name, self._loss_type(), features, targets, global_step) return sdca_model.update_weights(train_op) model_fn_ops = self._head.head_ops(features, targets, estimator.ModeKeys.TRAIN, _train_op_fn, logits=logits) return model_fn_ops.training_op, model_fn_ops.loss
def _get_train_ops(self, features, targets): """See base class.""" if not isinstance(self._linear_optimizer, sdca_optimizer.SDCAOptimizer): return super(LinearRegressor, self)._get_train_ops(features, targets) assert not self._joint_weights, ("_joint_weights is incompatible with" " SDCAOptimizer.") global_step = contrib_variables.get_or_create_global_step() logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._linear_feature_columns, num_outputs=self._target_column.num_label_columns, weight_collections=[self._linear_model.get_scope_name()], scope=self._linear_model.get_scope_name())) with ops.control_dependencies([self._centered_bias()]): loss = self._target_column.loss(logits, targets, features) logging_ops.scalar_summary("loss", loss) _add_bias_column(self._linear_feature_columns, features, bias, targets, columns_to_variables) train_op = self._linear_optimizer.get_train_step( columns_to_variables, self._target_column.weight_column_name, self._loss_type(), features, targets, global_step) return train_op, loss
def _linear_logits(self, features): logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._get_linear_feature_columns(), num_outputs=self._num_label_columns(), weight_collections=[self._linear_weight_collection], name="linear") return logits
def build_model(self, features, feature_columns, is_training): """See base class.""" features = self._get_feature_dict(features) self._feature_columns = feature_columns logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._get_feature_columns(), num_outputs=self._num_label_columns, weight_collections=[self._weight_collection_name], name="linear") return logits
def sdca_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] enable_centered_bias = params.get("enable_centered_bias", True) if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) train_feature_columns = _maybe_add_bias_column(feature_columns, features, bias, targets, enable_centered_bias, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() # TODO(zoy): Combine linear_feature_columns and columns_to_variables. train_op = optimizer.get_train_step(train_feature_columns, weight_column_name, loss_type, features, targets, columns_to_variables, global_step) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op
def sdca_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] enable_centered_bias = params.get("enable_centered_bias", True) if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) if enable_centered_bias: _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") logging_ops.scalar_summary("loss", loss) train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, targets, global_step) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op
def build_linear_logits(features, linear_feature_columns, num_ps_replicas, logits_dimension, linear_parent_scope): linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=logits_dimension, weight_collections=[linear_parent_scope], scope=scope) return linear_logits
def linear_layer(inputs, features, outputs, weight_collections, scope): """ Generates an output by multiplying each feature value by a weight Args: inputs: Input dictionary containing the data. features: List of columns to read. outputs: Number of outputs/classes. weight_collections: Collection where to add the trainable variables. By default added to GraphKeys.VARIABLES. scope: Name of the scope where to add the variables. Returns: Output of the linear model. """ with tf.variable_scope(scope) as sc: logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=inputs, feature_columns=features, num_outputs=outputs, weight_collections=weight_collections, scope=sc) return logits
def _get_linear_train_and_loss_ops(features, target, linear_feature_columns, target_column, linear_optimizer, loss_type, centered_bias, scope_name): """Returns train and loss ops for SDCAOptimizer.""" global_step = contrib_variables.get_global_step() assert global_step logits, columns_to_variables, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=target_column.num_label_columns, weight_collections=[scope_name], scope=scope_name) with ops.control_dependencies([centered_bias]): loss = target_column.loss(logits, target, features) logging_ops.scalar_summary("loss", loss) train_op = linear_optimizer.get_train_step(linear_feature_columns, target_column.weight_column_name, loss_type, features, target, columns_to_variables, global_step) return train_op, loss
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = (params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined." ) features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) input_layer_scope = dnn_parent_scope + "/input_from_feature_columns" with variable_scope.variable_scope( input_layer_scope, values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( dnn_parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( dnn_parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=scope) _add_hidden_layer_summary(dnn_logits, scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=features.values(), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, input_layer_scope), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate( len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops) return head.create_model_fn_ops(features, labels, mode, _make_training_op, logits=logits)
def sdca_model_fn(features, labels, mode, params): """A model_fn for linear models that use the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. Type must be one of `_BinarySvmHead`, `_RegressionHead` or `_BinaryLogisticHead`. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: An `SDCAOptimizer` instance. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. Returns: A `ModelFnOps` instance. Raises: ValueError: If `optimizer` is not an `SDCAOptimizer` instance. ValueError: If the type of head is neither `_BinarySvmHead`, nor `_RegressionHead` nor `_MultiClassHead`. ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] update_weights_hook = params.get("update_weights_hook", None) if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access loss_type = "hinge_loss" elif isinstance(head, head_lib._BinaryLogisticHead): # pylint: disable=protected-access loss_type = "logistic_loss" elif isinstance(head, head_lib._RegressionHead): # pylint: disable=protected-access assert head.logits_dimension == 1, ("SDCA only applies for " "logits_dimension=1.") loss_type = "squared_loss" else: raise ValueError("Unsupported head type: {}".format(head)) parent_scope = "linear" with variable_scope.variable_op_scope( features.values(), parent_scope) as scope: logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1, scope=scope)) _add_bias_column(feature_columns, features, bias, columns_to_variables) def _train_op_fn(unused_loss): global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step(columns_to_variables, weight_column_name, loss_type, features, labels, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) return train_op model_fn_ops = head.create_model_fn_ops( features=features, labels=labels, mode=mode, train_op_fn=_train_op_fn, logits=logits) if update_weights_hook is not None: return model_fn_ops._replace( training_chief_hooks=(model_fn_ops.training_chief_hooks + [update_weights_hook])) return model_fn_ops
def _linear_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use a gradient-based optimizer. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns) gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = config.num_ps_replicas if config else 0 joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection(parent_scope) grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (_get_optimizer(optimizer).apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def sdca_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn that uses the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: An `SDCAOptimizer` instance. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * loss_type: A string. Must be either "logistic_loss" or "hinge_loss". * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If `optimizer` is not an `SDCAOptimizer` instance. ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] update_weights_hook = params.get("update_weights_hook") if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") logging_ops.scalar_summary("loss", loss) train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step(columns_to_variables, weight_column_name, loss_type, features, targets, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) head = params.get("head", None) if not head: # TODO(zakaria): Remove these params and make head mandatory head = head_lib._multi_class_head( # pylint: disable=protected-access params.get("n_classes"), weight_column_name=params["weight_column_name"], enable_centered_bias=params.get("enable_centered_bias", False)) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.head_ops(features, targets, mode, _train_op_fn, logits)
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape( math_ops.to_float(features[weight_column_name]), shape=(-1,)) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append(optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def sdca_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` with values in the set {0, 1}. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. Type must be one of `_BinarySvmHead`, `_RegressionHead` or `_BinaryLogisticHead`. * feature_columns: An iterable containing all the feature columns used by the model. * l1_regularization: Global (across all examples) L1-regularization parameter. * l2_regularization: Global (across all examples) L2-regularization parameter. * num_loss_partitions: Number of partitions of the global loss function optimized by `SDCAOptimizer`. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If the type of head is not one of `_BinarySvmHead`, `_RegressionHead` or `_MultiClassHead`. ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] example_id_column = params["example_id_column"] l1_regularization = params["l1_regularization"] l2_regularization = params["l2_regularization"] num_loss_partitions = params["num_loss_partitions"] weight_column_name = params["weight_column_name"] update_weights_hook = params.get("update_weights_hook", None) partitioner = params["partitioner"] loss_type = None if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access loss_type = "hinge_loss" elif isinstance(head, head_lib._BinaryLogisticHead): # pylint: disable=protected-access loss_type = "logistic_loss" elif isinstance(head, head_lib._RegressionHead): # pylint: disable=protected-access loss_type = "squared_loss" else: raise ValueError("Unsupported head type: {}".format(type(head))) assert head.logits_dimension == 1, ( "SDCA only applies to logits_dimension=1.") # Update num_loss_partitions based on number of workers. n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas) optimizer = sdca_optimizer.SDCAOptimizer( example_id_column=example_id_column, num_loss_partitions=n_loss_partitions, symmetric_l1_regularization=l1_regularization, symmetric_l2_regularization=l2_regularization, partitioner=partitioner) parent_scope = "linear" with variable_scope.variable_scope( values=features.values(), name_or_scope=parent_scope, partitioner=partitioner) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1, scope=scope)) _add_bias_column(feature_columns, features, bias, columns_to_variables) def _train_op_fn(unused_loss): global_step = training_util.get_global_step() sdca_model, train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, labels, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) return train_op model_fn_ops = head.create_model_fn_ops( features=features, labels=labels, mode=mode, train_op_fn=_train_op_fn, logits=logits) if update_weights_hook is not None: return model_fn_ops._replace(training_chief_hooks=( model_fn_ops.training_chief_hooks + [update_weights_hook])) return model_fn_ops
def sdca_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` with values in the set {0, 1}. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. Type must be one of `_BinarySvmHead`, `_RegressionHead` or `_BinaryLogisticHead`. * feature_columns: An iterable containing all the feature columns used by the model. * l1_regularization: Global (across all examples) L1-regularization parameter. * l2_regularization: Global (across all examples) L2-regularization parameter. * num_loss_partitions: Number of partitions of the global loss function optimized by `SDCAOptimizer`. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If the type of head is not one of `_BinarySvmHead`, `_RegressionHead` or `_MultiClassHead`. ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] example_id_column = params["example_id_column"] l1_regularization = params["l1_regularization"] l2_regularization = params["l2_regularization"] num_loss_partitions = params["num_loss_partitions"] weight_column_name = params["weight_column_name"] update_weights_hook = params.get("update_weights_hook", None) loss_type = None if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access loss_type = "hinge_loss" elif isinstance(head, head_lib._BinaryLogisticHead): # pylint: disable=protected-access loss_type = "logistic_loss" elif isinstance(head, head_lib._RegressionHead): # pylint: disable=protected-access loss_type = "squared_loss" else: raise ValueError("Unsupported head type: {}".format(type(head))) assert head.logits_dimension == 1, ( "SDCA only applies to logits_dimension=1.") # Update num_loss_partitions based on number of workers. n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas) optimizer = sdca_optimizer.SDCAOptimizer( example_id_column=example_id_column, num_loss_partitions=n_loss_partitions, symmetric_l1_regularization=l1_regularization, symmetric_l2_regularization=l2_regularization) parent_scope = "linear" with variable_scope.variable_op_scope(features.values(), parent_scope) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1, scope=scope)) _add_bias_column(feature_columns, features, bias, columns_to_variables) def _train_op_fn(unused_loss): global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, labels, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) return train_op model_fn_ops = head.create_model_fn_ops( features=features, labels=labels, mode=mode, train_op_fn=_train_op_fn, logits=logits) if update_weights_hook is not None: return model_fn_ops._replace(training_chief_hooks=( model_fn_ops.training_chief_hooks + [update_weights_hook])) return model_fn_ops
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) head = params.get("head", None) if not head: # TODO(zakaria): Remove these params and make head mandatory head = head_lib._multi_class_head( # pylint: disable=protected-access params.get("n_classes"), weight_column_name=params["weight_column_name"], enable_centered_bias=params.get("enable_centered_bias", False)) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope(features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = (layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = (layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (optimizer.apply_gradients(zip(grads, my_vars), global_step=global_step)) return head.head_ops(features, targets, mode, _train_op_fn, logits)
def sdca_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn that uses the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: An `SDCAOptimizer` instance. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * loss_type: A string. Must be either "logistic_loss" or "hinge_loss". * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If `optimizer` is not an `SDCAOptimizer` instance. ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params.get("loss_type", None) update_weights_hook = params.get("update_weights_hook") if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) if loss_type is "hinge_loss": head = head_lib._binary_svm_head( # pylint: disable=protected-access weight_column_name=weight_column_name, enable_centered_bias=False) else: # pylint: disable=protected-access head = head_lib._multi_class_head( 2, # pylint: disable=protected-access weight_column_name=weight_column_name, enable_centered_bias=False) def _train_op_fn(unused_loss): global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, targets, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) return train_op return head.head_ops(features, targets, mode, _train_op_fn, logits)
def sdca_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn that uses the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: An `SDCAOptimizer` instance. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * loss_type: A string. Must be either "logistic_loss" or "hinge_loss". * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If `optimizer` is not an `SDCAOptimizer` instance. ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params.get("loss_type", None) update_weights_hook = params.get("update_weights_hook") if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) if loss_type is "hinge_loss": head = head_lib._binary_svm_head( # pylint: disable=protected-access weight_column_name=weight_column_name, enable_centered_bias=False) else: # pylint: disable=protected-access head = head_lib._multi_class_head(2, # pylint: disable=protected-access weight_column_name=weight_column_name, enable_centered_bias=False) def _train_op_fn(unused_loss): global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step(columns_to_variables, weight_column_name, loss_type, features, targets, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) return train_op return head.head_ops(features, targets, mode, _train_op_fn, logits)
def _linear_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) if not isinstance(features, dict): features = {"": features} num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes feat_values = features.values() if isinstance(features, dict) else [features] partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope(feat_values, "linear", partitioner=partitioner) as scope: logits, _, _ = (layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape(math_ops.to_float( features[weight_column_name]), shape=(-1, )) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append( optimizer.apply_gradients(zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def sdca_model_fn(features, labels, mode, params): """A model_fn for linear models that use the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. Type must be one of `_BinarySvmHead`, `_RegressionHead` or `_MultiClassHead`. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: An `SDCAOptimizer` instance. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. Returns: A `ModelFnOps` instance. Raises: ValueError: If `optimizer` is not an `SDCAOptimizer` instance. ValueError: If the type of head is neither `_BinarySvmHead`, nor `_RegressionHead` nor `_MultiClassHead`. ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] update_weights_hook = params.get("update_weights_hook", None) if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") # pylint: disable=protected-access if isinstance(head, head_lib._BinarySvmHead): loss_type = "hinge_loss" elif isinstance( head, (head_lib._MultiClassHead, head_lib._BinaryLogisticHead)): loss_type = "logistic_loss" elif isinstance(head, head_lib._RegressionHead): loss_type = "squared_loss" else: raise ValueError("Unsupported head type: {}".format(head)) # pylint: enable=protected-access parent_scope = "linear" with variable_scope.variable_op_scope( features.values(), parent_scope) as scope: logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1, scope=scope)) _add_bias_column(feature_columns, features, bias, labels, columns_to_variables) def _train_op_fn(unused_loss): global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step(columns_to_variables, weight_column_name, loss_type, features, labels, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) return train_op model_fn_ops = head.head_ops(features, labels, mode, _train_op_fn, logits) if update_weights_hook is not None: return model_fn_ops._replace( training_chief_hooks=(model_fn_ops.training_chief_hooks + [update_weights_hook])) return model_fn_ops
def _linear_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use a gradient-based optimizer. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns) gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = config.num_ps_replicas if config else 0 joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( parent_scope, values=features.values(), partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (_get_optimizer(optimizer).apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.head_ops(features, labels, mode, _train_op_fn, logits)
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope(features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = (layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = (layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape(math_ops.to_float( features[weight_column_name]), shape=(-1, )) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append( optimizer.apply_gradients(zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def _linear_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes feat_values = (features.values() if isinstance(features, dict) else [features]) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( feat_values, "linear", partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape( math_ops.to_float(features[weight_column_name]), shape=(-1,)) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append(optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def sdca_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn that uses the SDCA optimizer. Args: features: A dict of `Tensor` keyed by column name. targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: An `SDCAOptimizer` instance. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * loss_type: A string. Must be either "logistic_loss" or "hinge_loss". * update_weights_hook: A `SessionRunHook` object or None. Used to update model weights. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If `optimizer` is not an `SDCAOptimizer` instance. ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] update_weights_hook = params.get("update_weights_hook") if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") logging_ops.scalar_summary("loss", loss) train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() sdca_model, train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, targets, global_step) if update_weights_hook is not None: update_weights_hook.set_parameters(sdca_model, train_op) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all( isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all(isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. Returns: `estimator.ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = params["num_ps_replicas"] if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( dnn_parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( dnn_parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=scope) if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( dnn_parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=scope) _add_hidden_layer_summary(dnn_logits, scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=features.values(), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops) return head.head_ops( features, labels, mode, _make_training_op, logits=logits)