def build_model(self, features, feature_columns, is_training): """See base class.""" self._feature_columns = feature_columns partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=self._num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( self._scope, values=features.values(), partitioner=partitioner) as scope: if self._joint_weights: logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._get_feature_columns(), num_outputs=self._num_label_columns, weight_collections=[self._scope], scope=scope) else: logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._get_feature_columns(), num_outputs=self._num_label_columns, weight_collections=[self._scope], scope=scope) return logits
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_partitioner: Optional. Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = params.get("input_layer_partitioner") or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) linear_optimizer = _get_optimizer(linear_optimizer) _check_no_sync_replicas_optimizer(linear_optimizer) dnn_optimizer = _get_optimizer(dnn_optimizer) _check_no_sync_replicas_optimizer(dnn_optimizer) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: if all( isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in dnn_feature_columns ): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) else: net = fc_core.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if all(isinstance(fc, feature_column_lib._FeatureColumn) # pylint: disable=protected-access for fc in linear_feature_columns): if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits = fc_core.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension, weight_collections=[linear_parent_scope]) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=dnn_optimizer, gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=linear_optimizer, clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)
def _linear_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes feat_values = (features.values() if isinstance(features, dict) else [features]) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( feat_values, "linear", partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape( math_ops.to_float(features[weight_column_name]), shape=(-1,)) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append(optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def _linear_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use a gradient-based optimizer. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns) gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = config.num_ps_replicas if config else 0 joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( parent_scope, values=features.values(), partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (_get_optimizer(optimizer).apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.head_ops(features, labels, mode, _train_op_fn, logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = (params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined." ) features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) input_layer_scope = dnn_parent_scope + "/input_from_feature_columns" with variable_scope.variable_scope( input_layer_scope, values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( dnn_parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( dnn_parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=scope) _add_hidden_layer_summary(dnn_logits, scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=features.values(), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, input_layer_scope), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate( len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops) return head.create_model_fn_ops(features, labels, mode, _make_training_op, logits=logits)
def _linear_model_fn(features, labels, mode, params, config=None): """A model_fn for linear models that use a gradient-based optimizer. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. config: `RunConfig` object to configure the runtime settings. Returns: A `ModelFnOps` instance. Raises: ValueError: If mode is not any of the `ModeKeys`. """ head = params["head"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns) gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = config.num_ps_replicas if config else 0 joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection(parent_scope) grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (_get_optimizer(optimizer).apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope(features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = (layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = (layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape(math_ops.to_float( features[weight_column_name]), shape=(-1, )) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append( optimizer.apply_gradients(zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) head = params.get("head", None) if not head: # TODO(zakaria): Remove these params and make head mandatory head = head_lib._multi_class_head( # pylint: disable=protected-access params.get("n_classes"), weight_column_name=params["weight_column_name"], enable_centered_bias=params.get("enable_centered_bias", False)) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope(features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = (layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = (layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (optimizer.apply_gradients(zip(grads, my_vars), global_step=global_step)) return head.head_ops(features, targets, mode, _train_op_fn, logits)
def _dnn_linear_combined_model_fn(features, labels, mode, params): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. Returns: `estimator.ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") num_ps_replicas = params["num_ps_replicas"] if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( dnn_parent_scope + "/input_from_feature_columns", values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=scope) hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( dnn_parent_scope + "/hiddenlayer_%d" % layer_id, values=[net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=scope) if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, scope.name) with variable_scope.variable_scope( dnn_parent_scope + "/logits", values=[net], partitioner=hidden_layer_partitioner) as scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=scope) _add_hidden_layer_summary(dnn_logits, scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=features.values(), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=contrib_variables.get_global_step(), learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[])) return control_flow_ops.group(*train_ops) return head.head_ops( features, labels, mode, _make_training_op, logits=logits)
def _linear_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes feat_values = (features.values() if isinstance(features, dict) else [features]) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope(feat_values, "linear", partitioner=partitioner) as scope: if joint_weights: logits, _, _ = (layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) else: logits, _, _ = (layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape(math_ops.to_float( features[weight_column_name]), shape=(-1, )) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append( optimizer.apply_gradients(zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=[parent_scope], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape( math_ops.to_float(features[weight_column_name]), shape=(-1,)) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append(optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def _linear_classifier_model_fn(features, targets, mode, params): """Linear classifier model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * feature_columns: An iterable containing all the feature columns used by the model. * n_classes: number of target classes. * weight_column_name: A string defining the weight feature column, or None if there are no weights. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. * num_ps_replicas: The number of parameter server replicas. * joint_weights: If True, the weights for all columns will be stored in a single (possibly partitioned) variable. It's more efficient, but it's incompatible with SDCAOptimizer, and requires all feature columns are sparse and use the 'sum' combiner. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If mode is not any of the `ModeKeys`. """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) head = params.get("head", None) if not head: # TODO(zakaria): Remove these params and make head mandatory head = head_lib._multi_class_head( # pylint: disable=protected-access params.get("n_classes"), weight_column_name=params["weight_column_name"], enable_centered_bias=params.get("enable_centered_bias", False)) if not isinstance(features, dict): features = {"": features} parent_scope = "linear" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=head.logits_dimension, weight_collections=[parent_scope], scope=scope)) def _train_op_fn(loss): global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) return (optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) return head.head_ops(features, targets, mode, _train_op_fn, logits)