def padded_sequence_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels everywhere (non-0).""" # If the last dimension is 1 then we're using L1/L2 loss. if common_layers.shape_list(predictions)[-1] == 1: return rounding_sequence_accuracy( predictions, labels, weights_fn=weights_fn) with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) # Flatten, keeping batch dim (and num_classes dim for predictions) # TPU argmax can only deal with a limited number of dimensions predictions_shape = common_layers.shape_list(padded_predictions) batch_size = predictions_shape[0] num_classes = predictions_shape[-1] flat_size = common_layers.list_product( common_layers.shape_list(padded_labels)[1:]) padded_predictions = tf.reshape( padded_predictions, [batch_size, common_layers.list_product(predictions_shape[1:-1]), num_classes]) padded_labels = tf.reshape(padded_labels, [batch_size, flat_size]) weights = tf.reshape(weights, [batch_size, flat_size]) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): predictions = tf.to_float(predictions) labels = tf.to_float(labels) predictions, labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(labels) error = tf.pow(predictions - labels, 2) error_sqrt = tf.sqrt(tf.reduce_mean(error * weights)) return error_sqrt, tf.reduce_sum(weights)
def padded_log_poisson(predictions, labels, weights_fn=common_layers.weights_all): # Expects predictions to already be transformed into log space predictions, labels = common_layers.pad_with_zeros(predictions, labels) targets = labels weights = weights_fn(targets) lp_loss = tf.nn.log_poisson_loss(targets, predictions, compute_full_loss=True) return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights)
def padded_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights
def padded_variance_explained(predictions, labels, weights_fn=common_layers.weights_all): # aka R^2 predictions, labels = common_layers.pad_with_zeros(predictions, labels) targets = labels weights = weights_fn(targets) y_bar = tf.reduce_mean(weights * targets) tot_ss = tf.reduce_sum(weights * tf.pow(targets - y_bar, 2)) res_ss = tf.reduce_sum(weights * tf.pow(targets - predictions, 2)) r2 = 1. - res_ss / tot_ss return r2, tf.reduce_sum(weights)
def padded_cross_entropy_seqls(logits, labels, label_smoothing, weights_fn=weights_nonzero, reduce_sum=True, cutoff=0.0, gaussian=False): """Compute cross-entropy assuming 0s are padding. Computes a loss numerator (the sum of losses), and loss denominator (the number of non-padding tokens). Args: logits: a `Tensor` with shape `[batch, timesteps, vocab_size]`. optionally a FactoredTensor. labels: an integer `Tensor` with shape `[batch, timesteps]`. label_smoothing: a floating point `Scalar`. weights_fn: A function from labels to weights. reduce_sum: a Boolean, whether to sum at the end or not. cutoff: a float, at which point to have no loss. gaussian: If true, use a Gaussian distribution for label smoothing Returns: loss_numerator: a `Scalar`. Sum of losses. loss_denominator: a `Scalar. The number of non-padding target tokens. Raises: ValueError: in case of unsupported argument types. """ if isinstance(logits, FactoredTensor) or gaussian: raise ValueError("Gaussian smoothing not implemented because it's BS. " "Factored loss not implemented yet.") confidence = 1.0 - label_smoothing logits_shape = shape_list(logits) vocab_size = logits_shape[-1] with tf.name_scope("padded_cross_entropy", values=[logits, labels]): logits, labels = pad_with_zeros(logits, labels) logits = tf.reshape( logits, shape_list(labels) + [vocab_size], name="padded_cross_entropy_size_check") logits = tf.cast(logits, tf.float32) weights = weights_fn(labels) xent = smoothing_cross_entropy_seqls( logits, labels, vocab_size, confidence, weights=weights) if cutoff > 0.0: xent = tf.nn.relu(xent - cutoff) if not reduce_sum: return xent * weights, weights return tf.reduce_sum(xent * weights), tf.reduce_sum(weights)
def padded_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels on non-0s.""" # If the last dimension is 1 then we're using L1/L2 loss. if common_layers.shape_list(predictions)[-1] == 1: return rounding_accuracy(predictions, labels, weights_fn=weights_fn) with tf.variable_scope("padded_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights
def padded_sequence_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): if common_layers.shape_list(predictions)[-1] == 1: return rounding_sequence_accuracy(predictions, labels, weights_fn=weights_fn) with tf.variable_scope("padded_sequence_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) predictions_shape = common_layers.shape_list(padded_predictions) batch_size = predictions_shape[0] num_classes = predictions_shape[-1] flat_size = common_layers.list_product( common_layers.shape_list(padded_labels)[1:]) padded_predictions = tf.reshape(padded_predictions, [ batch_size, common_layers.list_product(predictions_shape[1:-1]), num_classes ]) padded_labels = tf.reshape(padded_labels, [batch_size, flat_size]) weights = tf.reshape(weights, [batch_size, flat_size]) n = 3 _, outputs = tf.nn.top_k(padded_predictions, k=2) weights = tf.expand_dims(weights, axis=-1) # weights += tf.zeros_like(outputs) weights += tf.zeros_like(tf.to_float(outputs)) outputs = tf.to_int32(outputs) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq_single = 1.0 - tf.minimum( 1.0, tf.reduce_sum(not_correct, axis=1)) correct_seq = tf.reduce_sum(correct_seq_single, axis=1) return correct_seq, tf.constant(1.0)
def padded_sequence_accuracy(predictions, labels, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels everywhere (non-0).""" with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) padded_labels = tf.to_int32(padded_labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) return correct_seq, tf.constant(1.0)
def loss(self, logits, features): """Computes cross-entropy loss and scales by 1/batch_size.""" labels = features["targets"] logits_shape = common_layers.shape_list(logits) vocab_size = logits_shape[-1] with tf.name_scope("padded_cross_entropy", values=[logits, labels]): logits, labels = common_layers.pad_with_zeros(logits, labels) logits = tf.reshape(logits, common_layers.shape_list(labels) + [vocab_size], name="padded_cross_entropy_size_check") logits = tf.cast(logits, tf.float32) xent = common_layers.smoothing_cross_entropy(logits, labels, vocab_size, confidence=1.0, gaussian=False) return tf.reduce_sum(xent) / tf.cast(logits_shape[0], tf.float32)
def padded_accuracy_topk(predictions, labels, k, weights_fn=common_layers.weights_nonzero): """Percentage of times that top-k predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) effective_k = tf.minimum(k, tf.shape(padded_predictions)[-1]) _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) outputs = tf.to_int32(outputs) padded_labels = tf.to_int32(padded_labels) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # Pad to same shape. same = tf.to_float(tf.equal(outputs, padded_labels)) same_topk = tf.reduce_sum(same, axis=-1) return same_topk, weights
def padded_accuracy_topk(predictions, labels, k, weights_fn=common_layers.weights_nonzero): """Percentage of times that top-k predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]): padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) effective_k = tf.minimum(k, common_layers.shape_list(padded_predictions)[-1]) _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) outputs = tf.to_int32(outputs) padded_labels = tf.to_int32(padded_labels) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # Pad to same shape. same = tf.to_float(tf.equal(outputs, padded_labels)) same_topk = tf.reduce_sum(same, axis=-1) return same_topk, weights
def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): predictions, labels = common_layers.pad_with_zeros(predictions, labels) targets = labels weights = weights_fn(targets) error = tf.sqrt(tf.pow(predictions - labels, 2)) return tf.reduce_sum(error * weights), tf.reduce_sum(weights)
def accuracy(predictions, labels, features): batch_size = tf.shape(predictions)[0] predictions, labels = common_layers.pad_with_zeros(predictions, labels) weights = tf.ones((batch_size,), dtype=tf.float32) ok = tf.to_float(tf.reduce_all(tf.equal(predictions, labels, axis=1), axis=1)) return ok, weights