def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. Args: decoding: List of Tensors with predictions. labels: List of Tensors with labels. sampling_decoding: Optional, List of Tensor with predictions to be used in sampling. E.g. they shouldn't have dependncy on outputs. If not provided, decoding is used. name: Operation name. Returns: Predictions and losses tensors. """ with ops.op_scope([decoding, labels], name, "sequence_classifier"): predictions, xent_list = [], [] for i, pred in enumerate(decoding): xent_list.append(nn.softmax_cross_entropy_with_logits( pred, labels[i], name="sequence_loss/xent_raw{0}".format(i))) if sampling_decoding: predictions.append(nn.softmax(sampling_decoding[i])) else: predictions.append(nn.softmax(pred)) xent = math_ops.add_n(xent_list, name="sequence_loss/xent") loss = math_ops.reduce_sum(xent, name="sequence_loss") return array_ops.expand_concat(1, predictions), loss
def call(self, inputs, **kwargs): x1, x2 = inputs[0], inputs[1] e = K.batch_dot(x1, x2, axes=[2, 2]) e1 = softmax(e, 2) e2 = softmax(e, 1) xe1 = K.batch_dot(e1, x2, axes=[2, 1]) xe2 = K.batch_dot(e2, x1, axes=[2, 1]) return [xe1, xe2]
def align(input_1, input_2): attention = Dot(axes=-1)([input_1, input_2]) w_att_1 = Lambda(lambda x: softmax(x, axis=1))(attention) w_att_2 = Permute((2, 1))(Lambda(lambda x: softmax(x, axis=2))(attention)) in1_aligned = Dot(axes=1)([w_att_1, input_1]) in2_aligned = Dot(axes=1)([w_att_2, input_2]) in1_aligned = add([in1_aligned, input_1]) in2_aligned = add([in2_aligned, input_2]) return in1_aligned, in2_aligned
def apply_attention_scores(self, scores, value, value_mask=None): """Applies attention scores to the given value tensor. To use this method in your attention layer, follow the steps: * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape `[batch_size, Tv]` to calculate the attention `scores`. * Pass `scores` and `value` tensors to this method. The method applies `value_mask`, calculates `attention_distribution = softmax(scores)`, then returns `matmul(attention_distribution, value). * Apply `query_mask` and return the result. Args: scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. value: Value tensor of shape `[batch_size, Tv, dim]`. value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. If given, will apply the mask such that values at positions where `mask==False` do not contribute to the result. Returns: Tensor of shape `[batch_size, Tq, dim]`. """ if value_mask is not None: # Mask of shape [batch_size, 1, Tv] that is True in padding position. padding_mask = array_ops.expand_dims( math_ops.logical_not(value_mask), axis=1) # Bias so padding positions do not contribute to attention distribution. scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx()) attention_distribution = nn.softmax(scores) return math_ops.matmul(attention_distribution, value)
def _apply_scores(self, scores, value, value_mask=None): """Applies attention scores to the given value tensor. To use this method in your attention layer, follow the steps: * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape `[batch_size, Tv]` to calculate the attention `scores`. * Pass `scores` and `value` tensors to this method. The method applies `value_mask`, calculates `attention_distribution = softmax(scores)`, then returns `matmul(attention_distribution, value). * Apply `query_mask` and return the result. Args: scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. value: Value tensor of shape `[batch_size, Tv, dim]`. value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. If given, will apply the mask such that values at positions where `mask==False` do not contribute to the result. Returns: Tensor of shape `[batch_size, Tq, dim]`. """ if value_mask is not None: # Mask of shape [batch_size, 1, Tv] that is True in padding position. padding_mask = array_ops.expand_dims( math_ops.logical_not(value_mask), axis=1) # Bias so padding positions do not contribute to attention distribution. scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx()) attention_distribution = nn.softmax(scores) return math_ops.matmul(attention_distribution, value)
def _classifier_model(self, features, targets, mode): logits, loss, train_op = self._logits_fn(features, targets, mode) return { 'classes': math_ops.argmax(logits, len(logits.get_shape()) - 1), 'probabilities': nn.softmax(logits) }, loss, train_op
def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, name=None): """Returns prediction and loss for softmax classifier. Args: tensor_in: Input tensor, [batch_size, feature_size], features. labels: Tensor, [batch_size, n_classes], labels of the output classes. weights: Tensor, [batch_size, feature_size], linear transformation matrix. biases: Tensor, [batch_size], biases. class_weight: Tensor, optional, [n_classes], weight for each class. If not given, all classes are supposed to have weight one. name: Operation name. Returns: Prediction and loss tensors. """ with ops.name_scope(name, "softmax_classifier", [tensor_in, labels]): logits = nn.xw_plus_b(tensor_in, weights, biases) if class_weight is not None: logits = math_ops.mul(logits, class_weight) return nn.softmax(logits), loss_ops.softmax_cross_entropy(logits, labels)
def create_estimator_spec(self): # Predict. with ops.name_scope('head'): with ops.name_scope(None, 'predictions', (self.logits_combine,)): logits = head_lib._check_logits(self.logits_combine, self.logits_dimension) logistic = math_ops.sigmoid(logits, name='logistic') two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name='probabilities') class_ids = array_ops.reshape( math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') classes = string_ops.as_string(class_ids, name='str_classes') predictions = { 'logits': logits, 'logistic': logistic, 'probabilities': scores, 'class_ids': class_ids, 'classes': classes, } if self.mode == model_fn.ModeKeys.PREDICT: #batch_size = array_ops.shape(logistic)[0] #export_class_list = string_ops.as_string([0, 1]) #export_output_classes = array_ops.tile( # input=array_ops.expand_dims(input=export_class_list, axis=0), # multiples=[batch_size, 1]) classifier_output = RankClassifierExportOutput(prob=scores) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ 'serving_default': classifier_output, 'classification': classifier_output, 'regression': export_output.RegressionOutput(logistic), 'predict': export_output.PredictOutput(predictions) }) # calculate loss unweighted_loss, processed_labels, training_loss, weights = self.calc_loss(logits) # Eval. if self.mode == model_fn.ModeKeys.EVAL: eval_metric_ops = self.eval_metric_ops( labels=processed_labels, logits=logits, logistic=logistic, scores=scores, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=eval_metric_ops) # Train return self.train_fn(training_loss, unweighted_loss, weights, predictions)
def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, name=None): """Returns prediction and loss for softmax classifier. This function returns "probabilities" and a cross entropy loss. To obtain predictions, use `tf.argmax` on the returned probabilities. This function requires labels to be passed in one-hot encoding. Args: tensor_in: Input tensor, [batch_size, feature_size], features. labels: Tensor, [batch_size, n_classes], one-hot labels of the output classes. weights: Tensor, [batch_size, feature_size], linear transformation matrix. biases: Tensor, [batch_size], biases. class_weight: Tensor, optional, [n_classes], weight for each class. If not given, all classes are supposed to have weight one. name: Operation name. Returns: `tuple` of softmax predictions and loss `Tensor`s. """ with ops.name_scope(name, 'softmax_classifier', [tensor_in, labels]): logits = nn.xw_plus_b(tensor_in, weights, biases) if class_weight is not None: logits = math_ops.multiply(logits, class_weight) return nn.softmax(logits), losses.softmax_cross_entropy(labels, logits)
def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, name=None): """Returns prediction and loss for softmax classifier. Args: tensor_in: Input tensor, [batch_size, feature_size], features. labels: Tensor, [batch_size, n_classes], labels of the output classes. weights: Tensor, [batch_size, feature_size], linear transformation matrix. biases: Tensor, [batch_size], biases. class_weight: Tensor, optional, [n_classes], weight for each class. If not given, all classes are supposed to have weight one. name: Operation name. Returns: Prediction and loss tensors. """ with ops.op_scope([tensor_in, labels], name, "softmax_classifier"): logits = nn.xw_plus_b(tensor_in, weights, biases) if class_weight is not None: logits = math_ops.mul(logits, class_weight) return nn.softmax(logits), loss_ops.softmax_cross_entropy( logits, labels)
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Output` after applying possible centered bias. Returns: Dict of prediction `Output` keyed by `PredictionKey`. """ with ops.name_scope(None, "predictions", (logits, )): two_class_logits = _one_class_to_two_class_logits(logits) return { prediction_key.PredictionKey.LOGITS: logits, prediction_key.PredictionKey.LOGISTIC: math_ops.sigmoid(logits, name=prediction_key.PredictionKey.LOGISTIC), prediction_key.PredictionKey.PROBABILITIES: nn.softmax(two_class_logits, name=prediction_key.PredictionKey.PROBABILITIES), prediction_key.PredictionKey.CLASSES: math_ops.argmax(two_class_logits, 1, name=prediction_key.PredictionKey.CLASSES) }
def softmax(x, axis=-1): """Softmax converts a real vector to a vector of categorical probabilities. The elements of the output vector are in range (0, 1) and sum to 1. Each vector is handled independently. The `axis` argument sets which axis of the input the function is applied along. Softmax is often used as the activation for the last layer of a classification network because the result could be interpreted as a probability distribution. The softmax of each vector x is computed as `exp(x) / tf.reduce_sum(exp(x))`. The input values in are the log-odds of the resulting probability. Args: x : Input tensor. axis: Integer, axis along which the softmax normalization is applied. Returns: Tensor, output of softmax transformation (all values are non-negative and sum to 1). """ output = nn.softmax(x, axis=axis) # Cache the logits to use for crossentropy loss. output._keras_logits = x # pylint: disable=protected-access return output
def _convert_to_estimator_model_result(self, logits_fn_result): logits, loss, train_op = logits_fn_result return { Classifier.CLASS_OUTPUT: math_ops.argmax(logits, len(logits.get_shape()) - 1), Classifier.PROBABILITY_OUTPUT: nn.softmax(logits) }, loss, train_op
def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, name=None): """Returns prediction and loss for softmax classifier. This function returns "probabilities" and a cross entropy loss. To obtain predictions, use `tf.argmax` on the returned probabilities. This function requires labels to be passed in one-hot encoding. Args: tensor_in: Input tensor, [batch_size, feature_size], features. labels: Tensor, [batch_size, n_classes], one-hot labels of the output classes. weights: Tensor, [batch_size, feature_size], linear transformation matrix. biases: Tensor, [batch_size], biases. class_weight: Tensor, optional, [n_classes], weight for each class. If not given, all classes are supposed to have weight one. name: Operation name. Returns: `tuple` of softmax predictions and loss `Tensor`s. """ with ops.name_scope(name, 'softmax_classifier', [tensor_in, labels]): logits = nn.xw_plus_b(tensor_in, weights, biases) if class_weight is not None: logits = math_ops.multiply(logits, class_weight) return nn.softmax(logits), loss_ops.softmax_cross_entropy( logits, labels)
def softmax(x, axis=-1): """The softmax activation function transforms the outputs so that all values are in range (0, 1) and sum to 1. It is often used as the activation for the last layer of a classification network because the result could be interpreted as a probability distribution. The softmax of x is calculated by exp(x)/tf.reduce_sum(exp(x)). Arguments: x : Input tensor. axis: Integer, axis along which the softmax normalization is applied. Returns: Tensor, output of softmax transformation (all values are non-negative and sum to 1). Raises: ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return nn.softmax(x) elif ndim > 2: e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True)) s = math_ops.reduce_sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D. ' 'Received input: %s' % (x, ))
def _convert_to_estimator_model_result(self, logits_fn_result): logits, loss, train_op = logits_fn_result return { 'classes': math_ops.argmax(logits, len(logits.get_shape()) - 1), 'probabilities': nn.softmax(logits) }, loss, train_op
def softmax(x, axis=-1): """The softmax activation function transforms the outputs so that all values are in range (0, 1) and sum to 1. It is often used as the activation for the last layer of a classification network because the result could be interpreted as a probability distribution. The softmax of x is calculated by exp(x)/tf.reduce_sum(exp(x)). Arguments: x : Input tensor. axis: Integer, axis along which the softmax normalization is applied. Returns: Tensor, output of softmax transformation (all values are non-negative and sum to 1). Raises: ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return nn.softmax(x) elif ndim > 2: e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True)) s = math_ops.reduce_sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D. ' 'Received input: %s' % (x,))
def _apply_scores(self, scores, value, scores_mask=None): """Applies attention scores to the given value tensor. To use this method in your attention layer, follow the steps: * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape `[batch_size, Tv]` to calculate the attention `scores`. * Pass `scores` and `value` tensors to this method. The method applies `scores_mask`, calculates `attention_distribution = softmax(scores)`, then returns `matmul(attention_distribution, value). * Apply `query_mask` and return the result. Args: scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. value: Value tensor of shape `[batch_size, Tv, dim]`. scores_mask: A boolean mask `Tensor` of shape `[batch_size, 1, Tv]` or `[batch_size, Tq, Tv]`. If given, scores at positions where `scores_mask==False` do not contribute to the result. It must contain at least one `True` value in each line along the last dimension. Returns: Tensor of shape `[batch_size, Tq, dim]`. """ if scores_mask is not None: padding_mask = math_ops.logical_not(scores_mask) # Bias so padding positions do not contribute to attention distribution. scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx()) attention_distribution = nn.softmax(scores) return math_ops.matmul(attention_distribution, value)
def get_attention_scores(self, inputs, mask=None): self._validate_call_args(inputs=inputs, mask=mask) q = inputs[0] v = inputs[1] k = inputs[2] if len(inputs) > 2 else v q_mask = mask[0] if mask else None v_mask = mask[1] if mask else None scores = self._calculate_scores(query=q, key=k) if v_mask is not None: # Mask of shape [batch_size, 1, Tv]. v_mask = array_ops.expand_dims(v_mask, axis=-2) if self.causal: # Creates a lower triangular mask, so position i cannot attend to # positions j>i. This prevents the flow of information from the future # into the past. scores_shape = array_ops.shape(scores) # causal_mask_shape = [1, Tq, Tv]. causal_mask_shape = array_ops.concat( [array_ops.ones_like(scores_shape[:-2]), scores_shape[-2:]], axis=0) causal_mask = _lower_triangular_mask(causal_mask_shape) else: causal_mask = None scores_mask = _merge_masks(v_mask, causal_mask) if scores_mask is not None: padding_mask = math_ops.logical_not(scores_mask) scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx()) attention_distribution = nn.softmax(scores) return attention_distribution
def logits_to_predictions(self, logits, proba=False): if self.num_label_columns == 1: logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) if proba: return nn.softmax(logits) else: return math_ops.argmax(logits, 1)
def logits_to_predictions(self, logits, proba=False): if self.num_label_columns == 1: logits = array_ops.concat([array_ops.zeros_like(logits), logits], 1) if proba: return nn.softmax(logits) else: return math_ops.argmax(logits, 1)
def _logits_to_predictions(self, logits, proba=False): if self._n_classes == 2: logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) if proba: return nn.softmax(logits) else: return math_ops.argmax(logits, 1)
def model_fn(X, Y_one_hot): with tf.variable_scope("logistic_regression"): weights = tf.get_variable('weights', [n_features, n_classes]) bias = tf.get_variable('bias', [n_classes]) logits = nn.xw_plus_b(X, weights, bias) y_probs = nn.softmax(logits) loss = loss_ops.softmax(logits, Y_one_hot) return y_probs, loss
def predictions(self, logits, keys=None): """Return predictions based on keys. See `base_head.Head` for details. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. keys: a list or tuple of prediction keys. Each key can be either the class variable of prediction_keys.PredictionKeys or its string value, such as: prediction_keys.PredictionKeys.CLASSES or 'classes'. If not specified, it will return the predictions for all valid keys. Returns: A dict of predictions. """ pred_keys = prediction_keys.PredictionKeys valid_keys = [ pred_keys.LOGITS, pred_keys.PROBABILITIES, pred_keys.CLASS_IDS, pred_keys.CLASSES, pred_keys.ALL_CLASS_IDS, pred_keys.ALL_CLASSES ] if keys: base_head.check_prediction_keys(keys, valid_keys) else: keys = valid_keys logits = base_head.check_logits_final_dim(logits, self.logits_dimension) predictions = {} with ops.name_scope('predictions', values=(logits, )): if pred_keys.LOGITS in keys: predictions[pred_keys.LOGITS] = logits if pred_keys.PROBABILITIES in keys: probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES) predictions[pred_keys.PROBABILITIES] = probabilities if pred_keys.CLASS_IDS in keys or pred_keys.CLASSES in keys: # class_ids's shape is [D0, D1, ... DN]. class_ids = math_ops.argmax(logits, axis=-1, name=pred_keys.CLASS_IDS) # Expand to [batch_size, 1]. class_ids = array_ops.expand_dims(class_ids, axis=-1) if pred_keys.CLASS_IDS in keys: predictions[pred_keys.CLASS_IDS] = class_ids if pred_keys.CLASSES in keys: if self._label_vocabulary: classes = self._class_string_table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions[pred_keys.CLASSES] = classes if pred_keys.ALL_CLASS_IDS in keys: predictions[pred_keys.ALL_CLASS_IDS] = base_head.all_class_ids( logits, n_classes=self._n_classes) if pred_keys.ALL_CLASSES in keys: predictions[pred_keys.ALL_CLASSES] = base_head.all_classes( logits, n_classes=self._n_classes, label_vocabulary=self._label_vocabulary) return predictions
def _logits_to_prediction(self, logits=None): predictions = {PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PredictionKey.PROBABILITIES] = nn.softmax(logits) predictions[PredictionKey.CLASSES] = math_ops.argmax(logits, 1) return predictions
def testGradient(self): x_shape = [5, 10] x_np = np.random.randn(*x_shape).astype(np.float64) with self.test_session(): x_tf = constant_op.constant(x_np) y_tf = nn.softmax(x_tf) err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape) eps = 1e-8 self.assertLess(err, eps)
def _predictions(logits, n_classes): """Returns predictions for the given logits and n_classes.""" predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = array_ops.reshape(math_ops.argmax(logits, 1), shape=(-1, 1)) return predictions
def _logits_to_prediction(self, logits=None): predictions = {PedictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PedictionKey.PROBABILITIES] = nn.softmax(logits) # Workaround for argmax dropping the second demension. predictions[PedictionKey.CLASSES] = array_ops.expand_dims( math_ops.argmax(logits, 1), 1) return predictions
def testSoftmax(self): x_shape = [5, 10] x_np = np.random.randn(*x_shape).astype(np.float32) y_np = self._softmax(x_np) with self.test_session(): x_tf = constant_op.constant(x_np) y_tf = nn.softmax(x_tf) y_tf_np = y_tf.eval() eps = 1e-3 self.assertAllClose(y_tf_np, y_np, eps)
def softmax(x, axis=-1): """Softmax converts a vector of values to a probability distribution. The elements of the output vector are in range (0, 1) and sum to 1. Each vector is handled independently. The `axis` argument sets which axis of the input the function is applied along. Softmax is often used as the activation for the last layer of a classification network because the result could be interpreted as a probability distribution. The softmax of each vector x is computed as `exp(x) / tf.reduce_sum(exp(x))`. The input values in are the log-odds of the resulting probability. Args: x : Input tensor. axis: Integer, axis along which the softmax normalization is applied. Returns: Tensor, output of softmax transformation (all values are non-negative and sum to 1). Examples: **Example 1: standalone usage** >>> inputs = tf.random.normal(shape=(32, 10)) >>> outputs = tf.keras.activations.softmax(inputs) >>> tf.reduce_sum(outputs[0, :]) # Each sample in the batch now sums to 1 <tf.Tensor: shape=(), dtype=float32, numpy=1.0000001> **Example 2: usage in a `Dense` layer** >>> layer = tf.keras.layers.Dense(32, activation=tf.keras.activations.softmax) """ if x.shape.rank > 1: if isinstance(axis, int): output = nn.softmax(x, axis=axis) else: # nn.softmax does not support tuple axis. e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True)) s = math_ops.reduce_sum(e, axis=axis, keepdims=True) output = e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D. ' 'Received input: %s' % (x, )) # Cache the logits to use for crossentropy loss. output._keras_logits = x # pylint: disable=protected-access return output
def softmax(x): """Softmax activation function applies the softmax function to the input tensor last dimension Args: x: a 2D Tensor of variable Returns: a 2D tensor whose ijth element is computed from the softmax function """ return nn.softmax(x)
def create_estimator_spec(self): # Predict. with ops.name_scope('head'): with ops.name_scope(None, 'predictions', (self.logits_combine, )): dnn_logits = head_lib._check_logits(self.logits_combine, self.logits_dimension) item_id_net = self.item_id_net if (self.mode == model_fn.ModeKeys.EVAL): logits = tf.reduce_sum(tf.multiply(dnn_logits, item_id_net), reduction_indices=1, keep_dims=True) #logits = tf.reduce_sum(tf.multiply(dnn_logits, item_id_net), reduction_indices=1, keep_dims = True) + tf.reduce_sum(self.bias_net, axis = 1, keep_dims = True) else: logits = tf.reduce_sum( tf.multiply(dnn_logits, item_id_net), reduction_indices=1, keep_dims=True) + tf.reduce_sum( self.bias_net, axis=1, keep_dims=True) logistic = math_ops.sigmoid(logits, name='logistic') two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name='probabilities') class_ids = array_ops.reshape(math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') classes = string_ops.as_string(class_ids, name='str_classes') predictions = { 'logits': logits, 'logistic': logistic, 'probabilities': scores, 'class_ids': class_ids, 'classes': classes, } # calculate loss unweighted_loss, processed_labels, training_loss, weights = self.calc_loss( logits) # Eval. if self.mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss) # Train return self.train_fn(training_loss, unweighted_loss, weights, predictions)
def predictions(self, logits, keys=None): """Return the predictions based on keys. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. keys: a list or tuple of prediction keys. Key can be either the class variable of prediction_keys.PredictionKeys or its string value, such as: prediction_keys.PredictionKeys.CLASSES or 'classes'. If not specified, it will return the predictions for all valid keys. Returns: A dict of predictions. """ pred_keys = prediction_keys.PredictionKeys valid_keys = [pred_keys.LOGITS, pred_keys.LOGISTIC, pred_keys.PROBABILITIES, pred_keys.CLASS_IDS, pred_keys.CLASSES] if keys: base_head.check_prediction_keys(keys, valid_keys) else: keys = valid_keys logits = base_head.check_logits_final_dim(logits, self.logits_dimension) predictions = {} with ops.name_scope('predictions', values=(logits,)): if pred_keys.LOGITS in keys: predictions[pred_keys.LOGITS] = logits if pred_keys.LOGISTIC in keys: logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) predictions[pred_keys.LOGISTIC] = logistic two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), axis=-1, name='two_class_logits') if pred_keys.PROBABILITIES in keys: probabilities = nn.softmax( two_class_logits, name=pred_keys.PROBABILITIES) predictions[pred_keys.PROBABILITIES] = probabilities if pred_keys.CLASS_IDS in keys or pred_keys.CLASSES in keys: class_ids = math_ops.argmax( two_class_logits, axis=-1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=-1) if pred_keys.CLASS_IDS in keys: predictions[pred_keys.CLASS_IDS] = class_ids if pred_keys.CLASSES in keys: if self._label_vocabulary is not None: classes = self._class_string_table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions[pred_keys.CLASSES] = classes return predictions
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Tensor` after applying possible centered bias. Returns: Dict of prediction `Tensor` keyed by `PredictionKey`. """ predictions = {prediction_key.PredictionKey.LOGITS: logits} predictions[prediction_key.PredictionKey.PROBABILITIES] = nn.softmax( logits) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.argmax( logits, 1) return predictions
def sdca_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] enable_centered_bias = params.get("enable_centered_bias", True) if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) train_feature_columns = _maybe_add_bias_column(feature_columns, features, bias, targets, enable_centered_bias, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() # TODO(zoy): Combine linear_feature_columns and columns_to_variables. train_op = optimizer.get_train_step(train_feature_columns, weight_column_name, loss_type, features, targets, columns_to_variables, global_step) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Tensor` after applying possible centered bias. Returns: Dict of prediction `Tensor` keyed by `PredictionKey`. """ with ops.name_scope(None, "predictions", (logits,)): return { prediction_key.PredictionKey.LOGITS: logits, prediction_key.PredictionKey.PROBABILITIES: nn.softmax( logits, name=prediction_key.PredictionKey.PROBABILITIES), prediction_key.PredictionKey.CLASSES: math_ops.argmax( logits, 1, name=prediction_key.PredictionKey.CLASSES) }
def sdca_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] enable_centered_bias = params.get("enable_centered_bias", True) if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") loss_fn = { "logistic_loss": _log_loss_with_two_classes, "hinge_loss": _hinge_loss, }[loss_type] logits, columns_to_variables, bias = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=1)) if enable_centered_bias: _add_bias_column(feature_columns, features, bias, targets, columns_to_variables) loss = None if mode != estimator.ModeKeys.INFER: loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss") logging_ops.scalar_summary("loss", loss) train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() train_op = optimizer.get_train_step( columns_to_variables, weight_column_name, loss_type, features, targets, global_step) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, train_op
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Output` after applying possible centered bias. Returns: Dict of prediction `Output` keyed by `PredictionKey`. """ predictions = {prediction_key.PredictionKey.LOGITS: logits} predictions[prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid( logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[prediction_key.PredictionKey.PROBABILITIES] = nn.softmax( logits) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.argmax( logits, 1) return predictions
def model_fn(features, labels, mode, params): """The model_fn argument for creating an Estimator.""" model = Model(params["data_format"]) image = features if isinstance(image, dict): image = features["image"] if mode == estimator.ModeKeys.PREDICT: logits = model(image, training=False) predictions = { "classes": math_ops.argmax(logits, axis=1), "probabilities": nn.softmax(logits), } return estimator.EstimatorSpec( mode=estimator.ModeKeys.PREDICT, predictions=predictions, export_outputs={ "classify": estimator.export.PredictOutput(predictions) }) elif mode == estimator.ModeKeys.TRAIN: optimizer = train.AdamOptimizer(learning_rate=1e-4) logits = model(image, training=True) loss = losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) return estimator.EstimatorSpec( mode=estimator.ModeKeys.TRAIN, loss=loss, train_op=optimizer.minimize(loss, train.get_or_create_global_step())) elif mode == estimator.ModeKeys.EVAL: logits = model(image, training=False) loss = losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) return estimator.EstimatorSpec( mode=estimator.ModeKeys.EVAL, loss=loss, eval_metric_ops={ "accuracy": ops.metrics.accuracy( labels=labels, predictions=math_ops.argmax(logits, axis=1)), })
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Output` after applying possible centered bias. Returns: Dict of prediction `Output` keyed by `PredictionKey`. """ with ops.name_scope(None, "predictions", (logits,)): two_class_logits = _one_class_to_two_class_logits(logits) return { prediction_key.PredictionKey.LOGITS: logits, prediction_key.PredictionKey.LOGISTIC: math_ops.sigmoid( logits, name=prediction_key.PredictionKey.LOGISTIC), prediction_key.PredictionKey.PROBABILITIES: nn.softmax( two_class_logits, name=prediction_key.PredictionKey.PROBABILITIES), prediction_key.PredictionKey.CLASSES: math_ops.argmax( two_class_logits, 1, name=prediction_key.PredictionKey.CLASSES) }
def softmax(x, axis=-1): """Softmax activation function. Arguments: x : Tensor. axis: Integer, axis along which the softmax normalization is applied. Returns: Tensor, output of softmax transformation. Raises: ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return nn.softmax(x) elif ndim > 2: e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True)) s = math_ops.reduce_sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D')
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with ops.name_scope('head'): logits = _check_logits(logits, self.logits_dimension) # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits,)): # class_ids's shape is [batch_size] class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=(1,)) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, # Expand to [batch_size, 1] pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(probabilities)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string( math_ops.range(self._n_classes)) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) classifier_output = export_output.ClassificationOutput( scores=probabilities, # `ClassificationOutput` requires string classes. classes=export_output_classes) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ _DEFAULT_SERVING_KEY: classifier_output, _CLASSIFY_SERVING_KEY: classifier_output, _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) # Eval. unweighted_loss, label_ids = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) weights = _weights(features, self._weight_column) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=label_ids, probabilities=probabilities, logits=logits, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar( _summary_key(self._name, metric_keys.MetricKeys.LOSS), training_loss) summary.scalar( _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN), losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def _linear_classifier_model_fn(features, targets, mode, params): """Estimator's linear model_fn.""" n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) num_ps_replicas = params.get("num_ps_replicas", 0) joint_weights = params.get("joint_weights", False) if not isinstance(features, dict): features = {"": features} num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes feat_values = (features.values() if isinstance(features, dict) else [features]) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( feat_values, "linear", partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) else: logits, _, _ = ( layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, weight_collections=["linear"], scope=scope)) if enable_centered_bias: logits = nn.bias_add(logits, _centered_bias(num_label_columns)) loss = None if mode != estimator.ModeKeys.INFER: loss = loss_fn(logits, targets) if weight_column_name: weight_tensor = array_ops.reshape( math_ops.to_float(features[weight_column_name]), shape=(-1,)) loss = _weighted_loss(loss, weight_tensor) else: loss = math_ops.reduce_mean(loss, name="loss") logging_ops.scalar_summary("loss", loss) train_ops = [] if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() my_vars = ops.get_collection("linear") grads = gradients.gradients(loss, my_vars) if gradient_clip_norm: grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) train_ops.append(optimizer.apply_gradients( zip(grads, my_vars), global_step=global_step)) if enable_centered_bias: train_ops.append( _centered_bias_step(targets, loss_fn, num_label_columns)) predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) return predictions, loss, control_flow_ops.group(*train_ops)
def get_logits_and_prob( logits=None, p=None, multidimensional=False, validate_args=False, name="GetLogitsAndProb"): """Converts logits to probabilities and vice-versa, and returns both. Args: logits: Numeric `Tensor` representing log-odds. p: Numeric `Tensor` representing probabilities. multidimensional: `Boolean`, default `False`. If `True`, represents whether the last dimension of `logits` or `p`, a [N1, N2, ... k] dimensional tensor, represent the logits / probability between k classes. For `p`, this will additionally assert that the values in the last dimension sum to one. If `False`, this will instead assert that each value of `p` is in `[0, 1]`, and will do nothing to `logits`. validate_args: `Boolean`, default `False`. Whether to assert `0 <= p <= 1` if multidimensional is `False`, otherwise that the last dimension of `p` sums to one. name: A name for this operation (optional). Returns: Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then the corresponding entry in the returned logits will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `p` nor `logits` were passed in, or both were. """ with ops.name_scope(name, values=[p, logits]): if p is None and logits is None: raise ValueError("Must pass p or logits.") elif p is not None and logits is not None: raise ValueError("Must pass either p or logits, not both.") elif p is None: logits = array_ops.identity(logits, name="logits") with ops.name_scope("p"): if multidimensional: p = nn.softmax(logits) else: p = math_ops.sigmoid(logits) elif logits is None: with ops.name_scope("p"): p = array_ops.identity(p) if validate_args: one = constant_op.constant(1., p.dtype) dependencies = [check_ops.assert_non_negative(p)] if multidimensional: dependencies += [assert_close( math_ops.reduce_sum(p, reduction_indices=[-1]), one, message="p does not sum to 1.")] else: dependencies += [check_ops.assert_less_equal( p, one, message="p has components greater than 1.")] p = control_flow_ops.with_dependencies(dependencies, p) with ops.name_scope("logits"): if multidimensional: # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see # logits = log(p) - log(gather(p, pivot)). A side-effect of being # consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional # case explicitly keeps the pivot dimension. logits = math_ops.log(p) else: logits = math_ops.log(p) - math_ops.log(1. - p) return (logits, p)
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" # Predict. with ops.name_scope('head'): with ops.name_scope(None, 'predictions', (logits,)): pred_keys = prediction_keys.PredictionKeys logits = _check_logits(logits, self.logits_dimension) logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES) class_ids = array_ops.reshape( math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: scores, pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(logistic)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string([0, 1]) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) classifier_output = export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. classes=export_output_classes) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': classifier_output, # to be same as other heads. 'classification': classifier_output, # to be called by name. _DEFAULT_SERVING_KEY: classifier_output, # default 'regression': export_output.RegressionOutput(value=logistic) }) # Eval. labels = _check_labels(_maybe_expand_dim(labels), self.logits_dimension) if self._label_vocabulary is not None: labels = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels) labels = math_ops.to_float(labels) labels = _assert_range(labels, 2) unweighted_loss = nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits, name='loss') weights = _weights(features, self._weight_column) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=labels, logits=logits, logistic=logistic, scores=scores, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar(metric_keys.MetricKeys.LOSS, training_loss) summary.scalar(metric_keys.MetricKeys.LOSS_MEAN, losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" # Predict. with ops.name_scope(self._name, 'head'): with ops.name_scope(None, 'predictions', (logits,)): pred_keys = prediction_keys.PredictionKeys logits = _check_logits_final_dim(logits, self.logits_dimension) logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), axis=-1, name='two_class_logits') probabilities = nn.softmax( two_class_logits, name=pred_keys.PROBABILITIES) class_ids = math_ops.argmax( two_class_logits, axis=-1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=-1) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: probabilities, pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: classifier_output = _classification_output( scores=probabilities, n_classes=2, label_vocabulary=self._label_vocabulary) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ _DEFAULT_SERVING_KEY: classifier_output, _CLASSIFY_SERVING_KEY: classifier_output, _REGRESS_SERVING_KEY: export_output.RegressionOutput( value=logistic), _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) (weighted_sum_loss, example_weight_sum, processed_labels) = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) # Eval. if mode == model_fn.ModeKeys.EVAL: weights = _get_weights_and_check_match_logits( features=features, weight_column=self._weight_column, logits=logits) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=weighted_sum_loss, eval_metric_ops=self._eval_metric_ops( labels=processed_labels, logits=logits, logistic=logistic, class_ids=class_ids, weights=weights, weighted_sum_loss=weighted_sum_loss, example_weight_sum=example_weight_sum)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar( _summary_key(self._name, metric_keys.MetricKeys.LOSS), weighted_sum_loss) summary.scalar( _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN), weighted_sum_loss / example_weight_sum) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=weighted_sum_loss, train_op=train_op_fn(weighted_sum_loss))
def dnn_sampled_softmax_classifier_model_fn(features, target_indices, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) target_indices: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[n_classes,], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(target_indices), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(target_indices), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot( indices=target_indices, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with variable_scope.variable_scope( None, default_name='multi_class_head', values=(tuple(six.itervalues(features)) + (labels, logits))): logits = _check_logits(logits, self.logits_dimension) # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits,)): # class_ids's shape is [batch_size] class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=(1,)) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, # Expand to [batch_size, 1] pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(probabilities)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string( math_ops.range(self._n_classes)) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': export_output.ClassificationOutput( scores=probabilities, # `ClassificationOutput` requires string classes. classes=export_output_classes) }) # Eval. label_ids = self._label_ids(_check_labels(labels, 1)) unweighted_loss = losses.sparse_softmax_cross_entropy( labels=label_ids, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=(1,)) weights = ( 1. if (self._weight_feature_key is None) else features[self._weight_feature_key]) weights = math_ops.to_float(weights, name='weights') training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=label_ids, probabilities=probabilities, logits=logits, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss) logging_ops.scalar_summary( metric_keys.MetricKeys.LOSS_MEAN, losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def get_logits_and_probs(logits=None, probs=None, multidimensional=False, validate_args=False, name="get_logits_and_probs"): """Converts logit to probabilities (or vice-versa), and returns both. Args: logits: Numeric `Tensor` representing log-odds. probs: Numeric `Tensor` representing probabilities. multidimensional: `Boolean`, default `False`. If `True`, represents whether the last dimension of `logits` or `probs`, a `[N1, N2, ... k]` dimensional tensor, representing the logit or probability of `shape[-1]` classes. validate_args: `Boolean`, default `False`. When `True`, either assert `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension of `probs` sums to one. name: A name for this operation (optional). Returns: logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or `1`, then the corresponding entry in the returned logit will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `probs` nor `logits` were passed in, or both were. """ with ops.name_scope(name, values=[probs, logits]): if (probs is None) == (logits is None): raise ValueError("Must pass probs or logits, but not both.") if probs is None: logits = ops.convert_to_tensor(logits, name="logits") if multidimensional: return logits, nn.softmax(logits, name="probs") return logits, math_ops.sigmoid(logits, name="probs") probs = ops.convert_to_tensor(probs, name="probs") if validate_args: with ops.name_scope("validate_probs"): one = constant_op.constant(1., probs.dtype) dependencies = [check_ops.assert_non_negative(probs)] if multidimensional: dependencies += [assert_close(math_ops.reduce_sum(probs, -1), one, message="probs does not sum to 1.")] else: dependencies += [check_ops.assert_less_equal( probs, one, message="probs has components greater than 1.")] probs = control_flow_ops.with_dependencies(dependencies, probs) with ops.name_scope("logits"): if multidimensional: # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see # logits = log(probs) - log(gather(probs, pivot)). A side-effect of # being consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional case # explicitly keeps the pivot dimension. return math_ops.log(probs), probs return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with variable_scope.variable_scope( None, default_name='binary_logistic_head', values=(tuple(six.itervalues(features)) + (labels, logits))): # Predict. pred_keys = prediction_keys.PredictionKeys logits = _check_logits(logits, self.logits_dimension) logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES) classes = array_ops.reshape( math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: scores, pred_keys.CLASS_IDS: classes } if mode == model_fn.ModeKeys.PREDICT: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={'': export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. # TODO(ptucker): Support label_keys. classes=string_ops.as_string(classes, name='str_classes'))}) # Eval. labels = _check_labels(math_ops.to_float(labels), self.logits_dimension) unweighted_loss = nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits, name='loss') weights = ( 1. if (self._weight_feature_key is None) else features[self._weight_feature_key]) weights = math_ops.to_float(weights, name='weights') training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=labels, logits=logits, logistic=logistic, scores=scores, classes=classes, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss) logging_ops.scalar_summary( metric_keys.MetricKeys.LOSS_MEAN, losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))