def call(self, labels, predictions, weights=None): """Accumulate accuracy statistics. `labels` and `predictions` should have the same shape. As argmax is being done here, labels and predictions type can be different. Args: labels: One-hot Tensor. predictions: Tensor with the logits or probabilities for each example. weights: Optional weighting of each example. Defaults to 1. Returns: The arguments, for easy chaining. """ check_ops.assert_equal( array_ops.shape(labels), array_ops.shape(predictions), message="Shapes of labels and predictions are unequal") labels = math_ops.argmax(labels, axis=-1) predictions = math_ops.argmax(predictions, axis=-1) matches = math_ops.equal(labels, predictions) matches = math_ops.cast(matches, self.dtype) super(CategoricalAccuracy, self).call(matches, weights=weights) if weights is None: return labels, predictions return labels, predictions, weights
def testArgMinMax(self): # Complex numbers do not support argmin/argmax. minmax_types = set(self.numeric_types) - set(self.complex_types) for dtype in minmax_types: self._assertOpOutputMatchesExpected( lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32), np.array([1, 10, 27, 3, 3, 4], dtype=dtype), expected=np.int32(2)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32), np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype), expected=np.array([0, 1, 0], dtype=np.int32)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmax(x, axis=1, output_type=dtypes.int32), np.array([[4, 1], [3, 2]], dtype=dtype), expected=np.array([0, 0], dtype=np.int32)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32), np.array([3, 10, 27, 3, 2, 4], dtype=dtype), expected=np.int32(4)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32), np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype), expected=np.array([1, 0, 1], dtype=np.int32)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmin(x, axis=1, output_type=dtypes.int32), np.array([[4, 1], [3, 2]], dtype=dtype), expected=np.array([1, 1], dtype=np.int32))
def testArgMinMax(self): for dtype in self.numeric_types: self._assertOpOutputMatchesExpected( lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32), np.array([1, 10, 27, 3, 3, 4], dtype=dtype), expected=np.int32(2)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32), np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype), expected=np.array([0, 1, 0], dtype=np.int32)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmax(x, axis=1, output_type=dtypes.int32), np.array([[4, 1], [3, 2]], dtype=dtype), expected=np.array([0, 0], dtype=np.int32)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32), np.array([3, 10, 27, 3, 2, 4], dtype=dtype), expected=np.int32(4)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32), np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype), expected=np.array([1, 0, 1], dtype=np.int32)) self._assertOpOutputMatchesExpected( lambda x: math_ops.argmin(x, axis=1, output_type=dtypes.int32), np.array([[4, 1], [3, 2]], dtype=dtype), expected=np.array([1, 1], dtype=np.int32))
def _logits_to_prediction(self, logits=None): predictions = {} # Workaround for argmax dropping the second demension. predictions[PedictionKey.LOGITS] = array_ops.expand_dims( math_ops.argmax(logits, 1), 1) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PedictionKey.CLASSES] = array_ops.expand_dims( math_ops.argmax(logits, 1), 1) return predictions
def call(self, labels, predictions, weights=None): """Accumulate accuracy statistics. `labels` and `predictions` should have the same shape except the predictions must have one additional trailing dimension equal to the number of classes(you want to predict). Type of labels and predictions can be different. Args: labels: Tensor of shape (batch_size, ) containing integers predictions: Tensor with the logits or probabilities for each example. weights: Optional weighting of each example. Defaults to 1. Returns: The arguments, for easy chaining. """ check_ops.assert_equal( array_ops.shape(labels), array_ops.shape(predictions)[0], message="First axis of labels and predictions is unequal") predictions = math_ops.argmax(predictions, axis=-1) labels = math_ops.cast(labels, dtypes.int64) matches = math_ops.equal(labels, predictions) matches = math_ops.cast(matches, self.dtype) super(SparseAccuracy, self).call(matches, weights=weights) if weights is None: return labels, predictions return labels, predictions, weights
def _logits_to_prediction(self, logits=None): predictions = {} predictions[PredictionKey.LOGITS] = logits logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PredictionKey.CLASSES] = math_ops.argmax(logits, 1) return predictions
def calculate_sequence_by_mask(mask, time_major): """Calculate the sequence length tensor (1-D) based on the masking tensor. The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For any timestep that should be masked, the corresponding field will be False. Consider the following example: a = [[True, True, False, False], [True, False, True, False]] It is a (2, 4) tensor, and the corresponding sequence length result should be 1D tensor with value [2, 3]. Note that for the second example, we need to find the index of the last True value, which is 2 and sequence length is 3. Args: mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if time_major=True. time_major: Boolean, which indicates whether the mask is time major or batch major. Returns: sequence_length: 1D int32 tensor. """ timestep_index = 0 if time_major else 1 max_seq_length = array_ops.shape(mask)[timestep_index] reversed_mask = math_ops.cast(array_ops.reverse(mask, axis=[timestep_index]), dtypes.int32) # Use the argmax to find the index of leading 1 in the reversed mask, which is # the index of the last True value in the original mask. reversed_index = math_ops.argmax(reversed_mask, axis=timestep_index, output_type=dtypes.int32) return max_seq_length - reversed_index
def logits_to_predictions(self, logits, proba=False): if proba: raise ValueError( "logits to probabilities is not supported for _BinarySvmTargetColumn") logits = array_ops.concat([array_ops.zeros_like(logits), logits], 1) return math_ops.argmax(logits, 1)
def _multi_value_predictions( activations, target_column, problem_type, predict_probabilities): """Maps `activations` from the RNN to predictions for multi value models. If `predict_probabilities` is `False`, this function returns a `dict` containing single entry with key `prediction_key.PredictionKey.CLASSES` for `problem_type` `ProblemType.CLASSIFICATION` or `prediction_key.PredictionKey.SCORE` for `problem_type` `ProblemType.LINEAR_REGRESSION`. If `predict_probabilities` is `True`, it will contain a second entry with key `prediction_key.PredictionKey.PROBABILITIES`. The value of this entry is a `Tensor` of probabilities with shape `[batch_size, padded_length, num_classes]`. Note that variable length inputs will yield some predictions that don't have meaning. For example, if `sequence_length = [3, 2]`, then prediction `[1, 2]` has no meaningful interpretation. Args: activations: Output from an RNN. Should have dtype `float32` and shape `[batch_size, padded_length, ?]`. target_column: An initialized `TargetColumn`, calculate predictions. problem_type: Either `ProblemType.CLASSIFICATION` or `ProblemType.LINEAR_REGRESSION`. predict_probabilities: A Python boolean, indicating whether probabilities should be returned. Should only be set to `True` for classification/logistic regression problems. Returns: A `dict` mapping strings to `Tensors`. """ with ops.name_scope('MultiValuePrediction'): activations_shape = array_ops.shape(activations) flattened_activations = array_ops.reshape(activations, [-1, activations_shape[2]]) prediction_dict = {} if predict_probabilities: flat_probabilities = target_column.logits_to_predictions( flattened_activations, proba=True) flat_predictions = math_ops.argmax(flat_probabilities, 1) if target_column.num_label_columns == 1: probability_shape = array_ops.concat([activations_shape[:2], [2]], 0) else: probability_shape = activations_shape probabilities = array_ops.reshape( flat_probabilities, probability_shape, name=prediction_key.PredictionKey.PROBABILITIES) prediction_dict[ prediction_key.PredictionKey.PROBABILITIES] = probabilities else: flat_predictions = target_column.logits_to_predictions( flattened_activations, proba=False) predictions_name = (prediction_key.PredictionKey.CLASSES if problem_type == constants.ProblemType.CLASSIFICATION else prediction_key.PredictionKey.SCORES) predictions = array_ops.reshape( flat_predictions, [activations_shape[0], activations_shape[1]], name=predictions_name) prediction_dict[predictions_name] = predictions return prediction_dict
def _convert_to_estimator_model_result(self, logits_fn_result): logits, loss, train_op = logits_fn_result return { Classifier.CLASS_OUTPUT: math_ops.argmax(logits, len(logits.get_shape()) - 1), Classifier.PROBABILITY_OUTPUT: nn.softmax(logits) }, loss, train_op
def _ModelFn(features, labels, mode): if is_training: logits_out = self._BuildGraph(features) else: graph_def = self._GetGraphDef(use_trt, batch_size, model_dir) logits_out = importer.import_graph_def( graph_def, input_map={INPUT_NODE_NAME: features}, return_elements=[OUTPUT_NODE_NAME + ':0'], name='')[0] loss = losses.sparse_softmax_cross_entropy( labels=labels, logits=logits_out) summary.scalar('loss', loss) classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out') accuracy = metrics.accuracy( labels=labels, predictions=classes_out, name='acc_op') summary.scalar('accuracy', accuracy[1]) if mode == ModeKeys.EVAL: return EstimatorSpec( mode, loss=loss, eval_metric_ops={'accuracy': accuracy}) elif mode == ModeKeys.TRAIN: optimizer = AdamOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op)
def _single_value_predictions(activations, sequence_length, target_column, predict_probabilities): """Maps `activations` from the RNN to predictions for single value models. If `predict_probabilities` is `False`, this function returns a `dict` containing single entry with key `PREDICTIONS_KEY`. If `predict_probabilities` is `True`, it will contain a second entry with key `PROBABILITIES_KEY`. The value of this entry is a `Tensor` of probabilities with shape `[batch_size, num_classes]`. Args: activations: Output from an RNN. Should have dtype `float32` and shape `[batch_size, padded_length, ?]`. sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32` containing the length of each sequence in the batch. If `None`, sequences are assumed to be unpadded. target_column: An initialized `TargetColumn`, calculate predictions. predict_probabilities: A Python boolean, indicating whether probabilities should be returned. Should only be set to `True` for classification/logistic regression problems. Returns: A `dict` mapping strings to `Tensors`. """ with ops.name_scope("SingleValuePrediction"): last_activations = select_last_activations(activations, sequence_length) if predict_probabilities: probabilities = target_column.logits_to_predictions(last_activations, proba=True) prediction_dict = { RNNKeys.PROBABILITIES_KEY: probabilities, RNNKeys.PREDICTIONS_KEY: math_ops.argmax(probabilities, 1), } else: predictions = target_column.logits_to_predictions(last_activations, proba=False) prediction_dict = {RNNKeys.PREDICTIONS_KEY: predictions} return prediction_dict
def __call__(self, inputs, state, scope=None): """Build the CrfDecodeForwardRnnCell. Args: inputs: A [batch_size, num_tags] matrix of unary potentials. state: A [batch_size, num_tags] matrix containing the previous step's score values. scope: Unused variable scope of this cell. Returns: backpointers: A [batch_size, num_tags] matrix of backpointers. new_state: A [batch_size, num_tags] matrix of new score values. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). state = array_ops.expand_dims(state, 2) # [B, O, 1] # This addition op broadcasts self._transitions_params along the zeroth # dimension and state along the second dimension. # [B, O, 1] + [1, O, O] -> [B, O, O] transition_scores = state + self._transition_params # [B, O, O] new_state = inputs + math_ops.reduce_max(transition_scores, [1]) # [B, O] backpointers = math_ops.argmax(transition_scores, 1) backpointers = math_ops.cast(backpointers, dtype=dtypes.int32) # [B, O] return backpointers, new_state
def extract_argmax_and_embed(prev, _): """Loop_function that extracts the symbol from prev and embeds it.""" if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1]) prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1)) return embedding_ops.embedding_lookup(embedding, prev_symbol)
def mode(self, name="mode"): with ops.name_scope(self.name): with ops.op_scope([], name): ret = math_ops.argmax(self.logits, dimension=self._batch_rank) ret = math_ops.cast(ret, self._dtype) ret.set_shape(self.get_batch_shape()) return ret
def _one_hot_to_embedding(one_hot, embedding_size): """Get a dense embedding vector from a one-hot encoding.""" num_tokens = one_hot.shape[1] label_id = math_ops.argmax(one_hot, axis=1) embedding = variable_scope.get_variable( 'embedding', [num_tokens, embedding_size]) return embedding_ops.embedding_lookup( embedding, label_id, name='token_to_embedding')
def _logits_to_predictions(self, logits, proba=False): if self._n_classes == 2: logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) if proba: return nn.softmax(logits) else: return math_ops.argmax(logits, 1)
def _multiclass_metrics(predictions, labels, weights): """Prepares eval metrics for multiclass eval.""" metrics = dict() logits = predictions["scores"] classes = math_ops.argmax(logits, 1) metrics["accuracy"] = metrics_lib.streaming_accuracy( classes, labels, weights) return metrics
def logits_to_predictions(self, logits, proba=False): if self.num_label_columns == 1: logits = array_ops.concat([array_ops.zeros_like(logits), logits], 1) if proba: return nn.softmax(logits) else: return math_ops.argmax(logits, 1)
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" with ops.name_scope(None, "predictions", (logits,)): return { prediction_key.PredictionKey.LOGITS: logits, prediction_key.PredictionKey.CLASSES: math_ops.argmax( _one_class_to_two_class_logits(logits), 1, name=prediction_key.PredictionKey.CLASSES) }
def sample(self, time, outputs, state, name=None): """sample for GreedyEmbeddingHelper.""" del time, state # unused by sample_fn # Outputs are logits, use argmax to get the most probable id if not isinstance(outputs, ops.Tensor): raise TypeError("Expected outputs to be a single Tensor, got: %s" % type(outputs)) sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32) return sample_ids
def _predictions(logits, n_classes): """Returns predictions for the given logits and n_classes.""" predictions = {} if n_classes == 2: predictions[_LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = array_ops.reshape(math_ops.argmax(logits, 1), shape=(-1, 1)) return predictions
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" predictions = {} predictions[prediction_key.PredictionKey.LOGITS] = logits logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.argmax( logits, 1) return predictions
def _logits_to_prediction(self, logits=None): predictions = {PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PredictionKey.PROBABILITIES] = nn.softmax(logits) predictions[PredictionKey.CLASSES] = math_ops.argmax(logits, 1) return predictions
def body(i, prev_c, prev_h, actions, log_probs): # pylint: disable=g-long-lambda signal = control_flow_ops.cond( math_ops.equal(i, 0), lambda: array_ops.tile(device_go_embedding, [self.hparams.num_children, 1]), lambda: embedding_ops.embedding_lookup(device_embeddings, actions.read(i - 1)) ) if self.hparams.keep_prob is not None: signal = nn_ops.dropout(signal, self.hparams.keep_prob) next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias) query = math_ops.matmul(next_h, attn_w_2) query = array_ops.reshape( query, [self.hparams.num_children, 1, self.hparams.hidden_size]) query = math_ops.tanh(query + attn_mem) query = array_ops.reshape(query, [ self.hparams.num_children * self.num_groups, self.hparams.hidden_size ]) query = math_ops.matmul(query, attn_v) query = array_ops.reshape(query, [self.hparams.num_children, self.num_groups]) query = nn_ops.softmax(query) query = array_ops.reshape(query, [self.hparams.num_children, self.num_groups, 1]) query = math_ops.reduce_sum(attn_mem * query, axis=1) query = array_ops.concat([next_h, query], axis=1) logits = math_ops.matmul(query, device_softmax) logits /= self.hparams.temperature if self.hparams.tanh_constant > 0: logits = math_ops.tanh(logits) * self.hparams.tanh_constant if self.hparams.logits_std_noise > 0: num_in_logits = math_ops.cast( array_ops.size(logits), dtype=dtypes.float32) avg_norm = math_ops.divide( linalg_ops.norm(logits), math_ops.sqrt(num_in_logits)) logits_noise = random_ops.random_normal( array_ops.shape(logits), stddev=self.hparams.logits_std_noise * avg_norm) logits = control_flow_ops.cond( self.global_step > self.hparams.stop_noise_step, lambda: logits, lambda: logits + logits_noise) if mode == "sample": next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed) elif mode == "greedy": next_y = math_ops.argmax(logits, 1) elif mode == "target": next_y = array_ops.slice(y, [0, i], [-1, 1]) else: raise NotImplementedError next_y = math_ops.to_int32(next_y) next_y = array_ops.reshape(next_y, [self.hparams.num_children]) actions = actions.write(i, next_y) log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=next_y) return i + 1, next_c, next_h, actions, log_probs
def _logits_to_prediction(self, logits=None): predictions = {PedictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PedictionKey.PROBABILITIES] = nn.softmax(logits) # Workaround for argmax dropping the second demension. predictions[PedictionKey.CLASSES] = array_ops.expand_dims( math_ops.argmax(logits, 1), 1) return predictions
def sparse_categorical_accuracy(y_true, y_pred): y_true = math_ops.reduce_max(y_true, axis=-1) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def loop_function(prev, _): if output_projection is not None: prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1]) prev_symbol = math_ops.argmax(prev, 1) # Note that gradients will not propagate through the second parameter of # embedding_lookup. emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = array_ops.stop_gradient(emb_prev) return emb_prev
def composed_sampler(logits, num_samples): # [batch size, num classes, num samples] unif = random_ops.random_uniform(logits.get_shape().concatenate( tensor_shape.TensorShape([num_samples]))) noise = -math_ops.log(-math_ops.log(unif)) # [batch size, num classes, 1] logits = array_ops.expand_dims(logits, -1) # [batch size, num samples] return math_ops.argmax(logits + noise, axis=1)
def crf_decode(potentials, transition_params, sequence_length): """Decode the highest scoring sequence of tags in TensorFlow. This is a function for tensor. Args: potentials: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. transition_params: A [num_tags, num_tags] matrix of binary potentials. sequence_length: A [batch_size] vector of true sequence lengths. Returns: decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indicies. best_score: A [batch_size] vector, containing the score of `decode_tags`. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). num_tags = potentials.get_shape()[2].value # Computes forward decoding. Get last score and backpointers. crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params) initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] backpointers, last_score = rnn.dynamic_rnn( crf_fwd_cell, inputs=inputs, sequence_length=sequence_length - 1, initial_state=initial_state, time_major=False, dtype=dtypes.int32) # [B, T - 1, O], [B, O] backpointers = gen_array_ops.reverse_sequence( backpointers, sequence_length - 1, seq_dim=1) # [B, T-1, O] # Computes backward decoding. Extract tag indices from backpointers. crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1), dtype=dtypes.int32) # [B] initial_state = array_ops.expand_dims(initial_state, axis=-1) # [B, 1] decode_tags, _ = rnn.dynamic_rnn( crf_bwd_cell, inputs=backpointers, sequence_length=sequence_length - 1, initial_state=initial_state, time_major=False, dtype=dtypes.int32) # [B, T - 1, 1] decode_tags = array_ops.squeeze(decode_tags, axis=[2]) # [B, T - 1] decode_tags = array_ops.concat([initial_state, decode_tags], axis=1) # [B, T] decode_tags = gen_array_ops.reverse_sequence( decode_tags, sequence_length, seq_dim=1) # [B, T] best_score = math_ops.reduce_max(last_score, axis=1) # [B] return decode_tags, best_score
def _mode(self): ret = math_ops.argmax(self.logits, axis=self._batch_rank) ret = math_ops.cast(ret, self.dtype) ret.set_shape(self.batch_shape) return ret
def argmax(x): return math_ops.argmax(x)
def _single_seq_fn(): squeezed_potentials = array_ops.squeeze(potentials, [1]) decode_tags = array_ops.expand_dims( math_ops.argmax(squeezed_potentials, axis=1), 1) best_score = math_ops.reduce_max(squeezed_potentials, axis=1) return math_ops.cast(decode_tags, dtype=dtypes.int32), best_score
def TestModel(inputs): scale = variables.Variable(1.0, trainable=False) # Scaling the outputs wont change the result... outputs = math_ops.multiply(inputs, scale) return math_ops.argmax(outputs, 1), scale
def _mode(self): ret = math_ops.argmax(self.logits, dimension=self._batch_rank) ret = math_ops.cast(ret, self.dtype) ret.set_shape(self.get_batch_shape()) return ret
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): """ Decoder function used in the `dynamic_rnn_decoder` with the purpose of inference. The main difference between this decoder function and the `decoder_fn` in `simple_decoder_fn_train` is how `next_cell_input` is calculated. In this decoder function we calculate the next input by applying an argmax across the feature dimension of the output from the decoder. This is a greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) use beam-search instead. Args: time: positive integer constant reflecting the current timestep. cell_state: state of RNNCell. cell_input: input provided by `dynamic_rnn_decoder`. cell_output: output of RNNCell. context_state: context state provided by `dynamic_rnn_decoder`. Returns: A tuple (done, next state, next input, emit output, next context state) where: done: A boolean vector to indicate which sentences has reached a `end_of_sequence_id`. This is used for early stopping by the `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with all elements as `true` is returned. next state: `cell_state`, this decoder function does not modify the given state. next input: The embedding from argmax of the `cell_output` is used as `next_input`. emit output: If `output_fn is None` the supplied `cell_output` is returned, else the `output_fn` is used to update the `cell_output` before calculating `next_input` and returning `cell_output`. next context state: `context_state`, this decoder function does not modify the given context state. The context state could be modified when applying e.g. beam search. """ with ops.name_scope(name, "simple_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError("Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = array_ops.ones([batch_size, ], dtype=dtype) * ( start_of_sequence_id) done = array_ops.zeros([batch_size, ], dtype=dtypes.bool) cell_state = encoder_state cell_output = array_ops.zeros([num_decoder_symbols], dtype=dtypes.float32) context_state = tf.zeros((batch_size, maxium_length_int), dtype=tf.int32) else: cell_output = output_fn(cell_output) if decode_type == 'sample': matrix_U = -1.0 * tf.log( -1.0 * tf.log(tf.random_uniform(tf.shape(cell_output), minval=0.0, maxval=1.0))) next_input_id = math_ops.cast( tf.argmax(tf.subtract(cell_output, matrix_U), dimension=1), dtype=dtype) elif decode_type == 'greedy': next_input_id = math_ops.cast( math_ops.argmax(cell_output, 1), dtype=dtype) else: raise ValueError("unknown decode type") done = math_ops.equal(next_input_id, end_of_sequence_id) # save the results into context state expanded_next_input = tf.expand_dims(next_input_id, axis=1) sliced_context_state = tf.slice(context_state, [0, 0], [-1, maxium_length_int - 1]) context_state = tf.concat([expanded_next_input, sliced_context_state], axis=1) context_state = tf.reshape(context_state, [batch_size, maxium_length_int]) next_input = array_ops.gather(embeddings, next_input_id) if context_vector is not None: next_input = tf.concat([next_input, context_vector], axis=1) # if time > maxlen, return all true vector done = control_flow_ops.cond(math_ops.greater(time, maximum_length), lambda: array_ops.ones([batch_size, ], dtype=dtypes.bool), lambda: done) return (done, cell_state, next_input, cell_output, context_state)
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): """Decoder function used in the `dynamic_rnn_decoder` for inference. The main difference between this decoder function and the `decoder_fn` in `attention_decoder_fn_train` is how `next_cell_input` is calculated. In decoder function we calculate the next input by applying an argmax across the feature dimension of the output from the decoder. This is a greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) use beam-search instead. Args: time: positive integer constant reflecting the current timestep. cell_state: state of RNNCell. cell_input: input provided by `dynamic_rnn_decoder`. cell_output: output of RNNCell. context_state: context state provided by `dynamic_rnn_decoder`. Returns: A tuple (done, next state, next input, emit output, next context state) where: done: A boolean vector to indicate which sentences has reached a `end_of_sequence_id`. This is used for early stopping by the `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with all elements as `true` is returned. next state: `cell_state`, this decoder function does not modify the given state. next input: The embedding from argmax of the `cell_output` is used as `next_input`. emit output: If `output_fn is None` the supplied `cell_output` is returned, else the `output_fn` is used to update the `cell_output` before calculating `next_input` and returning `cell_output`. next context state: `context_state`, this decoder function does not modify the given context state. The context state could be modified when applying e.g. beam search. Raises: ValueError: if cell_input is not None. """ with ops.name_scope( name, "attention_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError( "Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = array_ops.ones([ batch_size, ], dtype=dtype) * (start_of_sequence_id) done = array_ops.zeros([ batch_size, ], dtype=dtypes.bool) cell_state = encoder_state cell_output = array_ops.zeros([num_decoder_symbols], dtype=dtypes.float32) cell_input = array_ops.gather(embeddings, next_input_id) cell_type = array_ops.zeros([3], dtype=dtypes.float32) # init attention attention = _init_attention(encoder_state) else: # construct attention attention = attention_construct_fn(cell_output, attention_keys, attention_values) cell_output = attention #batch*2num_units cell_output = output_fn( cell_output) # probability on vocabulary list #u_sample = tf.random_uniform([batch_size, num_decoder_symbols]) #g_sample = -tf.log(-tf.log(u_sample + 1e-18) + 1e-18) #cell_output_samplemax = tf.log(cell_output+1e-18) + g_sample next_input_id = math_ops.cast(math_ops.argmax(cell_output, 1), dtype=dtype) #next_input_id = math_ops.cast( #math_ops.argmax(cell_output_samplemax, 1), dtype=dtype) done = math_ops.equal(next_input_id, end_of_sequence_id) cell_input = array_ops.gather(embeddings, next_input_id) # combine cell_input and attention next_input = array_ops.concat([cell_input, attention], 1) # if time > maxlen, return all true vector done = control_flow_ops.cond( math_ops.greater(time, maximum_length), lambda: array_ops.ones([ batch_size, ], dtype=dtypes.bool), lambda: done) return (done, cell_state, next_input, cell_output, context_state)
def sample(self, time, outputs, name=None, **unused_kwargs): """Gets a sample for one step.""" with ops.name_scope(name, "TrainingHelperSample", [time, outputs]): sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1), dtypes.int32) return sample_ids
def argmax(x): i = math_ops.argmax(x) return array_ops.stop_gradient(i)
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with variable_scope.variable_scope( None, default_name='binary_logistic_head', values=(tuple(six.itervalues(features)) + (labels, logits))): # Predict. pred_keys = prediction_keys.PredictionKeys logits = _check_logits(logits, self.logits_dimension) logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES) classes = array_ops.reshape( math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: scores, pred_keys.CLASS_IDS: classes } if mode == model_fn.ModeKeys.PREDICT: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={'': export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. # TODO(ptucker): Support label_keys. classes=string_ops.as_string(classes, name='str_classes'))}) # Eval. labels = _check_labels(_maybe_expand_dim(math_ops.to_float(labels)), self.logits_dimension) unweighted_loss = nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits, name='loss') weights = ( 1. if (self._weight_feature_key is None) else features[self._weight_feature_key]) weights = _maybe_expand_dim(math_ops.to_float(weights, name='weights')) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=labels, logits=logits, logistic=logistic, scores=scores, classes=classes, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss) logging_ops.scalar_summary( metric_keys.MetricKeys.LOSS_MEAN, losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): """ Decoder function used in the `dynamic_rnn_decoder` with the purpose of inference. The main difference between this decoder function and the `decoder_fn` in `greedy_decoder_fn_train` is how `next_cell_input` is calculated. In this decoder function we calculate the next input by applying an argmax across the feature dimension of the output from the decoder. This is a greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) use beam-search instead. Args: time: positive integer constant reflecting the current timestep. cell_state: state of RNNCell. cell_input: input provided by `dynamic_rnn_decoder`. cell_output: output of RNNCell. context_state: context state provided by `dynamic_rnn_decoder`. Returns: A tuple (done, next state, next input, emit output, next context state) where: done: A boolean vector to indicate which sentences has reached a `end_of_sequence_id`. This is used for early stopping by the `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with all elements as `true` is returned. next state: `cell_state`, this decoder function does not modify the given state. next input: The embedding from argmax of the `cell_output` is used as `next_input`. emit output: If `output_fn is None` the supplied `cell_output` is returned, else the `output_fn` is used to update the `cell_output` before calculating `next_input` and returning `cell_output`. next context state: `context_state`, this decoder function does not modify the given context state. The context state could be modified when applying e.g. beam search. """ with ops.name_scope( name, "greedy_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError( "Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = None done = array_ops.zeros([ batch_size, ], dtype=dtypes.bool) cell_state = encoder_state cell_output = array_ops.zeros([num_decoder_symbols], dtype=dtypes.float32) context_state = tensor_array_ops.TensorArray( #dtype=dtype, tensor_array_name="greedy_path", size=maximum_length + 1, infer_shape=False) dtype=dtype, tensor_array_name="greedy_path", size=0, dynamic_size=True, infer_shape=False) else: cell_output = output_fn(cell_output) next_input_id = math_ops.cast(math_ops.argmax(cell_output, 1), dtype=dtype) done = math_ops.equal(next_input_id, end_of_sequence_id) #done = tf.zeros_like(next_input_id, dtype=tf.bool) context_state = context_state.write(time - 1, next_input_id) next_input = array_ops.gather( embeddings, next_input_id) if next_input_id is not None else first_input # if time == maxlen, return all true vector done = control_flow_ops.cond( math_ops.equal(time, maximum_length), lambda: array_ops.ones([ batch_size, ], dtype=dtypes.bool), lambda: done) return (done, cell_state, next_input, cell_output, context_state)
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with variable_scope.variable_scope( None, default_name='multi_class_head', values=(tuple(six.itervalues(features)) + (labels, logits))): logits = _check_logits(logits, self.logits_dimension) # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits,)): # class_ids's shape is [batch_size] class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=(1,)) if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, # Expand to [batch_size, 1] pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(probabilities)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string( math_ops.range(self._n_classes)) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': export_output.ClassificationOutput( scores=probabilities, # `ClassificationOutput` requires string classes. classes=export_output_classes) }) # Eval. label_ids = self._label_ids(_check_labels(_maybe_expand_dim(labels), 1)) unweighted_loss = losses.sparse_softmax_cross_entropy( labels=label_ids, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=(1,)) weights = ( 1. if (self._weight_feature_key is None) else features[self._weight_feature_key]) weights = _maybe_expand_dim(math_ops.to_float(weights, name='weights')) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=label_ids, probabilities=probabilities, logits=logits, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss) logging_ops.scalar_summary( metric_keys.MetricKeys.LOSS_MEAN, losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" # Predict. with ops.name_scope('head'): with ops.name_scope(None, 'predictions', (logits,)): pred_keys = prediction_keys.PredictionKeys logits = _check_logits(logits, self.logits_dimension) logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), 1, name='two_class_logits') scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES) class_ids = array_ops.reshape( math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes') if self._label_vocabulary: table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=self._label_vocabulary, name='class_string_lookup') classes = table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions = { pred_keys.LOGITS: logits, pred_keys.LOGISTIC: logistic, pred_keys.PROBABILITIES: scores, pred_keys.CLASS_IDS: class_ids, pred_keys.CLASSES: classes, } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(logistic)[0] export_class_list = self._label_vocabulary if not export_class_list: export_class_list = string_ops.as_string([0, 1]) export_output_classes = array_ops.tile( input=array_ops.expand_dims(input=export_class_list, axis=0), multiples=[batch_size, 1]) classifier_output = export_output.ClassificationOutput( scores=scores, # `ClassificationOutput` requires string classes. classes=export_output_classes) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ _DEFAULT_SERVING_KEY: classifier_output, _CLASSIFY_SERVING_KEY: classifier_output, _REGRESS_SERVING_KEY: export_output.RegressionOutput( value=logistic), _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions) }) # Eval. unweighted_loss, processed_labels = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) weights = _weights(features, self._weight_column) training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=processed_labels, logits=logits, logistic=logistic, scores=scores, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar( _summary_key(self._name, metric_keys.MetricKeys.LOSS), training_loss) summary.scalar( _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN), losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids, candidate_ids, margin_multiplier, margin_type): """Find the next centroid that maximizes the loss augmented inference. This function is a subroutine called from compute_augmented_facility_locations Args: pairwise_distances: 2-D Tensor of pairwise distances. labels: 1-D Tensor of ground truth cluster assignment. chosen_ids: 1-D Tensor of current centroid indices. candidate_ids: 1-D Tensor of candidate indices. margin_multiplier: multiplication constant. margin_type: Type of structured margin to use. Default is nmi. Returns: integer index. """ num_candidates = array_ops.shape(candidate_ids)[0] pairwise_distances_chosen = array_ops.gather(pairwise_distances, chosen_ids) pairwise_distances_candidate = array_ops.gather(pairwise_distances, candidate_ids) pairwise_distances_chosen_tile = array_ops.tile(pairwise_distances_chosen, [1, num_candidates]) candidate_scores = -1.0 * math_ops.reduce_sum(array_ops.reshape( math_ops.reduce_min(array_ops.concat([ pairwise_distances_chosen_tile, array_ops.reshape(pairwise_distances_candidate, [1, -1]) ], 0), axis=0, keepdims=True), [num_candidates, -1]), axis=1) nmi_scores = array_ops.zeros([num_candidates]) iteration = array_ops.constant(0) def func_cond(iteration, nmi_scores): del nmi_scores # Unused in func_cond() return iteration < num_candidates def func_body(iteration, nmi_scores): predictions = get_cluster_assignment( pairwise_distances, array_ops.concat([chosen_ids, [candidate_ids[iteration]]], 0)) nmi_score_i = compute_clustering_score(labels, predictions, margin_type) pad_before = array_ops.zeros([iteration]) pad_after = array_ops.zeros([num_candidates - 1 - iteration]) # return 1 - NMI score as the structured loss. # because NMI is higher the better [0,1]. return iteration + 1, nmi_scores + array_ops.concat( [pad_before, [1.0 - nmi_score_i], pad_after], 0) _, nmi_scores = control_flow_ops.while_loop(func_cond, func_body, [iteration, nmi_scores]) candidate_scores = math_ops.add(candidate_scores, margin_multiplier * nmi_scores) argmax_index = math_ops.cast(math_ops.argmax(candidate_scores, axis=0), dtypes.int32) return candidate_ids[argmax_index]
def _mode(self): ret = math_ops.argmax(self.logits, axis=self._batch_rank) ret = array_ops.one_hot(ret, self.event_size, dtype=self.dtype) ret.set_shape(self.logits.get_shape()) return ret
def multi_value_predictions(activations, target_column, problem_type, predict_probabilities): """Maps `activations` from the RNN to predictions for multi value models. If `predict_probabilities` is `False`, this function returns a `dict` containing single entry with key `prediction_key.PredictionKey.CLASSES` for `problem_type` `ProblemType.CLASSIFICATION` or `prediction_key.PredictionKey.SCORE` for `problem_type` `ProblemType.LINEAR_REGRESSION`. If `predict_probabilities` is `True`, it will contain a second entry with key `prediction_key.PredictionKey.PROBABILITIES`. The value of this entry is a `Tensor` of probabilities with shape `[batch_size, padded_length, num_classes]`. Note that variable length inputs will yield some predictions that don't have meaning. For example, if `sequence_length = [3, 2]`, then prediction `[1, 2]` has no meaningful interpretation. Args: activations: Output from an RNN. Should have dtype `float32` and shape `[batch_size, padded_length, ?]`. target_column: An initialized `TargetColumn`, calculate predictions. problem_type: Either `ProblemType.CLASSIFICATION` or `ProblemType.LINEAR_REGRESSION`. predict_probabilities: A Python boolean, indicating whether probabilities should be returned. Should only be set to `True` for classification/logistic regression problems. Returns: A `dict` mapping strings to `Tensors`. """ with ops.name_scope('MultiValuePrediction'): activations_shape = array_ops.shape(activations) flattened_activations = array_ops.reshape(activations, [-1, activations_shape[2]]) prediction_dict = {} if predict_probabilities: flat_probabilities = target_column.logits_to_predictions( flattened_activations, proba=True) flat_predictions = math_ops.argmax(flat_probabilities, 1) if target_column.num_label_columns == 1: probability_shape = array_ops.concat( [activations_shape[:2], [2]], 0) else: probability_shape = activations_shape probabilities = array_ops.reshape( flat_probabilities, probability_shape, name=prediction_key.PredictionKey.PROBABILITIES) prediction_dict[ prediction_key.PredictionKey.PROBABILITIES] = probabilities else: flat_predictions = target_column.logits_to_predictions( flattened_activations, proba=False) predictions_name = (prediction_key.PredictionKey.CLASSES if problem_type == constants.ProblemType.CLASSIFICATION else prediction_key.PredictionKey.SCORES) predictions = array_ops.reshape( flat_predictions, [activations_shape[0], activations_shape[1]], name=predictions_name) prediction_dict[predictions_name] = predictions return prediction_dict
def _my_metric_op(predictions, targets): """Simply multiplies predictions and targets to return [1, 0 , 0].""" prediction_classes = math_ops.argmax(predictions, 1) return tf.mul(prediction_classes, tf.reshape(targets, [-1]))
def calc_sample_id(self, time, logits): return tf.cond(time < conf.pick_multinomial_max_len, lambda: tf.cast(tf.multinomial(logits, 1), tf.int32)[:, 0], lambda: math_ops.cast(math_ops.argmax(logits, axis=-1), dtypes.int32) )
def testStopGradient(self): grad = backprop.gradients_function( lambda x: array_ops.stop_gradient(math_ops.argmax(x))) self.assertAllEqual(grad([0.0])[0], None)
def sampler(time, outputs, state): # this isn't necessary, but just do it to get the types right sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1), tf.int32) return sample_ids
def create_estimator_spec(self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with variable_scope.variable_scope( None, default_name='multi_class_head', values=(tuple(six.itervalues(features)) + (labels, logits))): logits = _check_logits(logits, self.logits_dimension) # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits, )): # class_ids's shape is [batch_size] class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASSES) probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, # Expand to [batch_size, 1] pred_keys.CLASSES: array_ops.expand_dims(class_ids, axis=(1, )) } if mode == model_fn.ModeKeys.PREDICT: batch_size = array_ops.shape(probabilities)[0] output_classes = array_ops.tile(input=array_ops.expand_dims( input=math_ops.range(self._n_classes), axis=0), multiples=[batch_size, 1]) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': export_output.ClassificationOutput( scores=probabilities, # `ClassificationOutput` requires string classes. # TODO(xiejw): Support label_keys or label_column classes=string_ops.as_string(output_classes, name='str_classes')) }) # Eval. labels = _check_labels(labels, 1) # Check that we got integer for classification. if not labels.dtype.is_integer: raise ValueError('Labels dtype should be integer ' 'Instead got %s.' % labels.dtype) assert_less = check_ops.assert_less( labels, ops.convert_to_tensor(self._n_classes, dtype=labels.dtype), message='Label IDs must < n_classes') assert_greater = check_ops.assert_non_negative( labels, message='Label Ids must >= 0') with ops.control_dependencies((assert_less, assert_greater)): labels = array_ops.identity(labels) unweighted_loss = losses.sparse_softmax_cross_entropy( labels=labels, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=(1, )) weights = (1. if (self._weight_feature_key is None) else features[self._weight_feature_key]) weights = math_ops.to_float(weights, name='weights') training_loss = losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=labels, probabilities=probabilities, logits=logits, class_ids=class_ids, unweighted_loss=unweighted_loss, weights=weights)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss) logging_ops.scalar_summary( metric_keys.MetricKeys.LOSS_MEAN, losses.compute_weighted_loss(unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def __init__(self, name, unfreeze_time=30000, autoencode=False, action_lr=1e-4, deconv_lr=1e-3, fwd_consist=False, baseline_reg=False, softmaxBackprop=True, gtAction=False): self.unfreeze_time = unfreeze_time self.autoencode = autoencode self.gtAction = gtAction self.name = '{0}_{1}_{2}_{3}_{4}_{5}K_{6}_{7}'.format( name, 'fwdconsist' + str(fwd_consist), 'baselinereg' + str(baseline_reg), 'deconv_lr' + str(deconv_lr), 'autoencode' + str(autoencode), 'unfreeze' + str(int(unfreeze_time / 1000.)), 'softmax' + str(softmaxBackprop), 'gtAction' + str(gtAction)) self.fwd_consist = fwd_consist self.start = 0 self.batch_loader = rope_data self.image_ph = tf.placeholder(tf.float32, [None, 200, 200, 3], name='image_ph') self.goal_image_ph = tf.placeholder(tf.float32, [None, 200, 200, 3], name='goal_image_ph') self.location_ph = tf.placeholder(tf.float32, [None, LOCATION_BINS], name='location_ph') self.theta_ph = tf.placeholder(tf.float32, [None, THETA_BINS], name='theta_ph') self.length_ph = tf.placeholder(tf.float32, [None, LENGTH_BINS], name='length_ph') self.ignore_flag_ph = tf.placeholder(tf.float32, [None], name='ignore_flag_ph') self.is_training_ph = tf.placeholder(tf.bool, name='is_training_ph') self.autoencode_ph = tf.placeholder(tf.bool) self.gtAction_ph = tf.placeholder(tf.bool) # get latent representations for both the images latent_image, latent_conv5_image = alexnet_geurzhoy.network( self.image_ph, trainable=True, num_outputs=ENCODING_SIZE) latent_goal_image, latent_conv5_goal_image = alexnet_geurzhoy.network( self.goal_image_ph, trainable=True, num_outputs=ENCODING_SIZE, reuse=True) # concatenate the latent representations and share information features = tf.concat(1, [latent_image, latent_goal_image]) with tf.variable_scope("concat_fc"): x = tf.nn.relu(features) x = slim.fully_connected(x, FEAT_SIZE, scope="concat_fc") ################################# # ACTION PREDICTION ################################# location_embedding = init_weights( 'location_embedding', [LOCATION_BINS, LOCATION_EMBEDDING_SIZE]) theta_embedding = init_weights('theta_embedding', [THETA_BINS, THETA_EMBEDDING_SIZE]) # layer for predicting X, Y with tf.variable_scope('location_pred'): loc_network_layers = [FEATURE_SIZE, 200, 200, LOCATION_BINS] location_pred = make_network(x, loc_network_layers) location_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( location_pred, self.location_ph)) location_sample = math_ops.argmax( tf.cond(self.is_training_ph, lambda: self.location_ph, lambda: location_pred), 1) location_embed = embedding_ops.embedding_lookup( location_embedding, location_sample) # layer for predicting theta with tf.variable_scope('theta_pred'): x_with_loc = tf.concat(1, [x, location_embed]) theta_network_layers = [ FEATURE_SIZE + LOCATION_EMBEDDING_SIZE, 200, 200, THETA_BINS ] theta_pred = make_network(x_with_loc, theta_network_layers) theta_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( theta_pred, self.theta_ph)) theta_sample = math_ops.argmax( tf.cond(self.is_training_ph, lambda: self.theta_ph, lambda: theta_pred), 1) theta_embed = embedding_ops.embedding_lookup( theta_embedding, theta_sample) # layer for predicting length of movement with tf.variable_scope('length_pred'): x_with_loc_theta = tf.concat(1, [x_with_loc, theta_embed]) length_network_layers = [ FEATURE_SIZE + LOCATION_EMBEDDING_SIZE + THETA_EMBEDDING_SIZE, 200, 200, LENGTH_BINS ] length_pred = make_network(x_with_loc_theta, length_network_layers) length_softmax = tf.nn.softmax_cross_entropy_with_logits( length_pred, self.length_ph) length_loss = tf.reduce_mean(length_softmax * self.ignore_flag_ph) # add to collections for retrieval tf.add_to_collection('location_logit', location_pred) tf.add_to_collection('theta_logit', theta_pred) tf.add_to_collection('len_logit', length_pred) # variables of only inverse model without features inv_vars_no_alex = [ v for v in tf.trainable_variables() if 'alexnet' not in v.name ] print('Action prediction tensors consist {0} out of {1}'.format( len(inv_vars_no_alex), len(tf.trainable_variables()))) total_loss = location_loss + theta_loss + length_loss action_optimizer = tf.train.AdamOptimizer(action_lr) action_grads, _ = zip( *action_optimizer.compute_gradients(total_loss, inv_vars_no_alex)) action_grads, _ = tf.clip_by_global_norm(action_grads, GRAD_CLIP_NORM) action_grads = zip(action_grads, inv_vars_no_alex) action_grads_full, _ = zip(*action_optimizer.compute_gradients( total_loss, tf.trainable_variables())) action_grads_full, _ = tf.clip_by_global_norm(action_grads_full, GRAD_CLIP_NORM) action_grads_full = zip(action_grads_full, tf.trainable_variables()) ################################# # FORWARD CONSISTENCY ################################# if self.fwd_consist: with tf.variable_scope('fwd_consist'): if softmaxBackprop: location_pred = tf.nn.softmax(location_pred) theta_pred = tf.nn.softmax(theta_pred) length_pred = tf.nn.softmax(length_pred) # baseline regularization => gradients flow only to alexnet, not action pred if baseline_reg: print('baseline') action_embed = tf.concat( 1, [self.location_ph, self.theta_ph, self.length_ph]) else: # fwd_consist => gradients flow through action prediction latent_conv5_image = tf.stop_gradient(latent_conv5_image) action_embed = tf.cond( self.gtAction_ph, lambda: tf.concat(1, [ self.location_ph, self.theta_ph, self.length_ph ]), lambda: tf.concat( 1, [location_pred, theta_pred, length_pred])) action_embed = slim.fully_connected(action_embed, 363) action_embed = tf.reshape(action_embed, [-1, 11, 11, 3]) # concat along depth fwd_features = tf.concat(3, [latent_conv5_image, action_embed]) # deconvolution batch_size = tf.shape(fwd_features)[0] wt1 = tf.Variable( tf.truncated_normal([5, 5, 64, 259], stddev=0.1)) deconv1 = tf.nn.conv2d_transpose(fwd_features, wt1, [batch_size, 22, 22, 64], [1, 2, 2, 1]) deconv1 = leaky_relu(deconv1, 0.2) wt2 = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev=0.1)) deconv2 = tf.nn.conv2d_transpose(deconv1, wt2, [batch_size, 44, 44, 32], [1, 2, 2, 1]) deconv2 = leaky_relu(deconv2, 0.2) wt3 = tf.Variable( tf.truncated_normal([5, 5, 3, 32], stddev=0.1)) deconv3 = tf.nn.conv2d_transpose(deconv2, wt3, [batch_size, 88, 88, 3], [1, 2, 2, 1]) deconv3 = tf.nn.tanh(deconv3) # loss from upsampled deconvolution and goal image upsampled_deconv_img = tf.image.resize_images( deconv3, [200, 200]) tf.add_to_collection('upsampled_deconv_img', upsampled_deconv_img) # image inputs are -255 to 255 ??? for some reason # whether to autoencode or not normalized_goal_img = tf.cond( self.autoencode_ph, lambda: self.image_ph / 255.0, lambda: self.goal_image_ph / 255.0) # just to visualize deconv_log_img = (upsampled_deconv_img + 1.0) * 127.5 # variables of only forward model fwd_vars = [ v for v in tf.trainable_variables() if 'fwd_consist' in v.name ] print('Forward consistency tensors consist {0} out of {1}'. format(len(fwd_vars), len(tf.trainable_variables()))) fwd_consist_loss = tf.reduce_mean( tf.abs(upsampled_deconv_img - normalized_goal_img)) deconv_optimizer = tf.train.AdamOptimizer(deconv_lr) fwd_consist_grads, _ = zip(*deconv_optimizer.compute_gradients( fwd_consist_loss, fwd_vars)) fwd_consist_grads, _ = tf.clip_by_global_norm( fwd_consist_grads, GRAD_CLIP_NORM) fwd_consist_grads = zip(fwd_consist_grads, fwd_vars) fwd_consist_grads_full, _ = zip( *deconv_optimizer.compute_gradients( fwd_consist_loss, tf.trainable_variables())) fwd_consist_grads_full, _ = tf.clip_by_global_norm( fwd_consist_grads_full, GRAD_CLIP_NORM) fwd_consist_grads_full = zip(fwd_consist_grads_full, tf.trainable_variables()) self.optimize_fwd_freeze = deconv_optimizer.apply_gradients( fwd_consist_grads) with tf.control_dependencies([ fwd_consist_grads_full[0][0][0], action_grads_full[0][0][0] ]): self.optimize_fwd_full = deconv_optimizer.apply_gradients( fwd_consist_grads_full) self.optimize_action_full = action_optimizer.apply_gradients( action_grads_full) self.optimize_action_no_alex = action_optimizer.apply_gradients( action_grads) self.optimize_action_alex = action_optimizer.apply_gradients( action_grads_full) ################################# # LOGGING AND SAVING OPERATIONS ################################# loc_correct_pred = tf.equal(tf.argmax(location_pred, 1), tf.argmax(self.location_ph, 1)) self.loc_accuracy = tf.reduce_mean( tf.cast(loc_correct_pred, tf.float32)) theta_correct_pred = tf.equal(tf.argmax(theta_pred, 1), tf.argmax(self.theta_ph, 1)) self.theta_accuracy = tf.reduce_mean( tf.cast(theta_correct_pred, tf.float32)) length_correct_pred = tf.equal(tf.argmax(length_pred, 1), tf.argmax(self.length_ph, 1)) self.length_accuracy = tf.reduce_mean( tf.cast(length_correct_pred, tf.float32)) # logging tf.summary.scalar('model/location_loss', location_loss, collections=['train']) tf.summary.scalar('model/theta_loss', theta_loss, collections=['train']) tf.summary.scalar('model/length_loss', length_loss, collections=['train']) if self.fwd_consist: tf.summary.scalar('model/fwd_consist_loss', fwd_consist_loss, collections=['train']) tf.summary.image('upsampled_deconv_image', deconv_log_img, max_outputs=5, collections=['train']) tf.summary.image('before', (self.image_ph + 255.0) / 2.0, max_outputs=5, collections=['train']) tf.summary.image('after', (self.goal_image_ph + 255.0) / 2.0, max_outputs=5, collections=['train']) self.train_summaries = tf.summary.merge_all('train') self.writer = tf.summary.FileWriter('./results/{0}/logs/{1}'.format( self.name, time.time())) self.saver = tf.train.Saver(max_to_keep=None) self.sess = tf.Session(config=CONFIG) self.sess.run(tf.global_variables_initializer()) self.model_directory = './results/{0}/models/'.format(self.name) if not os.path.exists(self.model_directory): os.makedirs(self.model_directory)
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): """Decoder function used in the `dynamic_rnn_decoder` for inference. The main difference between this decoder function and the `decoder_fn` in `attention_decoder_fn_train` is how `next_cell_input` is calculated. In decoder function we calculate the next input by applying an argmax across the feature dimension of the output from the decoder. This is a greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) use beam-search instead. Args: time: positive integer constant reflecting the current timestep. cell_state: state of RNNCell. cell_input: input provided by `dynamic_rnn_decoder`. cell_output: output of RNNCell. context_state: context state provided by `dynamic_rnn_decoder`. Returns: A tuple (done, next state, next input, emit output, next context state) where: done: A boolean vector to indicate which sentences has reached a `end_of_sequence_id`. This is used for early stopping by the `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with all elements as `true` is returned. next state: `cell_state`, this decoder function does not modify the given state. next input: The embedding from argmax of the `cell_output` is used as `next_input`. emit output: If `output_fn is None` the supplied `cell_output` is returned, else the `output_fn` is used to update the `cell_output` before calculating `next_input` and returning `cell_output`. next context state: `context_state`, this decoder function does not modify the given context state. The context state could be modified when applying e.g. beam search. Raises: ValueError: if cell_input is not None. """ with ops.name_scope( name, "attention_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError( "Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = array_ops.ones([ batch_size, ], dtype=dtype) * (start_of_sequence_id) done = array_ops.zeros([ batch_size, ], dtype=dtypes.bool) cell_state = encoder_state cell_output = array_ops.zeros([num_decoder_symbols], dtype=dtypes.float32) word_input = array_ops.gather(embeddings, next_input_id) naf_triple_id = array_ops.zeros([batch_size, 2], dtype=dtype) triple_input = array_ops.gather_nd(imem[1], naf_triple_id) cell_input = array_ops.concat([word_input, triple_input], axis=1) # init attention attention = _init_attention(encoder_state) if imem is not None: # added by our model context_state = tensor_array_ops.TensorArray( dtype=dtypes.int32, tensor_array_name="output_ids_ta", size=maximum_length, dynamic_size=True, infer_shape=False) else: # construct attention attention = attention_construct_fn(cell_output, attention_keys, attention_values) if type(attention) is tuple: # added by our model attention, alignment = attention[0], attention[1] cell_output = attention alignment = tf.reshape(alignment, [batch_size, -1]) selector = selector_fn(cell_output) selector = tf.sigmoid(selector) logit = output_fn(cell_output) # cell output cell_output = logit word_prob = nn_ops.softmax(logit) * (1 - selector) entity_prob = alignment * selector mask = array_ops.reshape( math_ops.cast(math_ops.greater( tf.reduce_max(word_prob, 1), tf.reduce_max(entity_prob, 1)), dtype=dtypes.float32), [-1, 1]) word_input = mask * array_ops.gather( embeddings, math_ops.cast(math_ops.argmax(word_prob, 1), dtype=dtype) ) + (1 - mask) * array_ops.gather_nd( imem[0], array_ops.concat([ array_ops.reshape( math_ops.range(batch_size, dtype=dtype), [-1, 1]), array_ops.reshape( math_ops.cast(math_ops.argmax(entity_prob, 1), dtype=dtype), [-1, 1]) ], axis=1)) indices = array_ops.concat([ array_ops.reshape( math_ops.range(batch_size, dtype=dtype), [-1, 1]), math_ops.cast(1 - mask, dtype=dtype) * tf.reshape( math_ops.cast(math_ops.argmax(alignment, 1), dtype=dtype), [-1, 1]) ], axis=1) triple_input = array_ops.gather_nd(imem[1], indices) # cell input cell_input = array_ops.concat([word_input, triple_input], axis=1) # done mask mask = array_ops.reshape(math_ops.cast(mask, dtype=dtype), [-1]) input_id = mask * math_ops.cast( math_ops.argmax(word_prob, 1), dtype=dtype) +\ (mask - 1) * math_ops.cast(math_ops.argmax(entity_prob, 1), dtype=dtype) context_state = context_state.write(time - 1, input_id) done = array_ops.reshape( math_ops.equal(input_id, end_of_sequence_id), [-1]) else: cell_output = attention # argmax decoder cell_output = output_fn(cell_output) # logits next_input_id = math_ops.cast(math_ops.argmax( cell_output, 1), dtype=dtype) done = math_ops.equal(next_input_id, end_of_sequence_id) cell_input = array_ops.gather(embeddings, next_input_id) # combine cell_input and attention next_input = array_ops.concat([cell_input, attention], 1) # if time > maxlen, return all true vector done = control_flow_ops.cond( math_ops.greater(time, maximum_length), lambda: array_ops.ones([ batch_size, ], dtype=dtypes.bool), lambda: done) return (done, cell_state, next_input, cell_output, context_state)
def sample(self, time, outputs, name=None, **unused_kwargs): with ops.name_scope(name, "TrainingHelperSample", [time, outputs]): sample_ids = math_ops.argmax(outputs, axis=-1, output_type=tf.int32) return sample_ids
def sparse_categorical_accuracy(y_true, y_pred): return math_ops.cast( math_ops.equal( math_ops.reduce_max(y_true, axis=-1), math_ops.cast(math_ops.argmax(y_pred, axis=-1), K.floatx())), K.floatx())
def mode(self, name="mode"): with ops.name_scope(self.name): with ops.op_scope([], name): ret = math_ops.argmax(self.logits, dimension=self._batch_rank) ret.set_shape(self.get_batch_shape()) return ret
def top_k_categorical_accuracy(y_true, y_pred, k=5): return K.mean(nn.in_top_k(y_pred, math_ops.argmax(y_true, axis=-1), k), axis=-1)
def sample(self, time, outputs, state): del state sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1), dtypes.int32) return sample_ids
def random_forest_model_fn(features, labels, mode, params, config): """Function that returns predictions, training loss, and training op.""" labels_tensor = labels if isinstance(labels, dict) and len(labels) == 1: labels_tensor = labels.values()[0] weights_name = params["weights_name"] keys_name = params["keys_name"] num_classes = tf.identity(params['num_classes'], name='num_classes') params_toGraphs = tensor_forest.ForestHParams( num_classes=params['num_classes'], num_features=params['num_features'], num_trees=params['num_trees'], max_nodes=params['max_nodes'], regression=params['regression'], split_after_samples=params['split_after_samples']) # 注意第90行 fill() # https://github.com/tensorflow/tensorflow/blob/r1.2/tensorflow/contrib # /tensor_forest/python/tensor_forest.py params_toGraphs = params_toGraphs.fill() graph_builder_class = tensor_forest.RandomForestGraphs early_stopping_rounds = params["early_stopping_rounds"] num_trainers = 1 trainer_id = 0 report_feature_importances = False model_dir = None local_eval = False device_assigner = None weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params_toGraphs, device_assigner=dev_assn) inference = {} predictions = {} output_alternatives = None # if (mode == model_fn_lib.ModeKeys.EVAL or # mode == model_fn_lib.ModeKeys.INFER): if True: inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if params_toGraphs.regression: predictions = {None: inference[eval_metrics.INFERENCE_PROB_NAME]} output_alternatives = { None: (constants.ProblemType.LINEAR_REGRESSION, predictions) } else: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) predictions = { prediction_key.PredictionKey.PROBABILITIES: inference[eval_metrics.INFERENCE_PROB_NAME], prediction_key.PredictionKey.CLASSES: inference[eval_metrics.INFERENCE_PRED_NAME] } output_alternatives = { None: (constants.ProblemType.CLASSIFICATION, predictions) } if report_feature_importances: inference[eval_metrics.FEATURE_IMPORTANCE_NAME] = ( graph_builder.feature_importances()) if keys is not None: inference[keys_name] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None training_hooks = [] scaffold = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) if hasattr(graph_builder, 'finalize_training'): finalize_listener = EveryCheckpointPreSaveListener( graph_builder.finalize_training()) scaffold = monitored_session.Scaffold() training_hooks.append( basic_session_run_hooks.CheckpointSaverHook( model_dir, save_secs=600, save_steps=None, scaffold=scaffold, listeners=[finalize_listener])) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss( features, labels, name='rf_training_loss') # 命名以传到 hook 中 if not params['regression']: confusion_matrix_print = confusion_matrix( labels=labels_tensor, predictions=predictions['classes'], num_classes=num_classes, ) confusion_matrix_print = tf.identity(confusion_matrix_print, name='confusion_matrix_print') else: confusion_matrix_print = tf.identity(0, name='confusion_matrix_print') regression_ornot = tf.identity(params['regression'], name='regression_ornot') # Put weights back in if weights is not None: features[weights_name] = weights if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds)) metrics = {} # metrics[metric_key.MetricKey.AUC] = metrics_lib.streaming_auc( # labels=labels_tensor, # predictions=inference[eval_metrics.INFERENCE_PRED_NAME] # ) if not params_toGraphs.regression: metrics['eval_confusion_matrix'] = confusion_matrix( labels=labels_tensor, predictions=predictions['classes'], num_classes=params['num_classes'], ) return model_fn_lib.ModelFnOps(mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks, scaffold=scaffold, eval_metric_ops=metrics, output_alternatives=output_alternatives)
def assignments(self): """Returns a list of Tensors with the matrix of assignments per shard.""" ret = [] for w in self._w: ret.append(math_ops.argmax(w, 1)) return ret