def call(self, labels, predictions, weights=None):
    """Accumulate accuracy statistics.

    `labels` and `predictions` should have the same shape.
    As argmax is being done here, labels and predictions type
    can be different.

    Args:
      labels: One-hot Tensor.
      predictions: Tensor with the logits or probabilities for each example.
      weights: Optional weighting of each example. Defaults to 1.

    Returns:
      The arguments, for easy chaining.
    """
    check_ops.assert_equal(
        array_ops.shape(labels), array_ops.shape(predictions),
        message="Shapes of labels and predictions are unequal")
    labels = math_ops.argmax(labels, axis=-1)
    predictions = math_ops.argmax(predictions, axis=-1)
    matches = math_ops.equal(labels, predictions)
    matches = math_ops.cast(matches, self.dtype)
    super(CategoricalAccuracy, self).call(matches, weights=weights)
    if weights is None:
      return labels, predictions
    return labels, predictions, weights
  def testArgMinMax(self):
    # Complex numbers do not support argmin/argmax.
    minmax_types = set(self.numeric_types) - set(self.complex_types)
    for dtype in minmax_types:
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32),
          np.array([1, 10, 27, 3, 3, 4], dtype=dtype),
          expected=np.int32(2))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32),
          np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype),
          expected=np.array([0, 1, 0], dtype=np.int32))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmax(x, axis=1, output_type=dtypes.int32),
          np.array([[4, 1], [3, 2]], dtype=dtype),
          expected=np.array([0, 0], dtype=np.int32))

      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32),
          np.array([3, 10, 27, 3, 2, 4], dtype=dtype),
          expected=np.int32(4))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32),
          np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype),
          expected=np.array([1, 0, 1], dtype=np.int32))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmin(x, axis=1, output_type=dtypes.int32),
          np.array([[4, 1], [3, 2]], dtype=dtype),
          expected=np.array([1, 1], dtype=np.int32))
Example #3
0
  def testArgMinMax(self):
    for dtype in self.numeric_types:
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32),
          np.array([1, 10, 27, 3, 3, 4], dtype=dtype),
          expected=np.int32(2))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmax(x, axis=0, output_type=dtypes.int32),
          np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype),
          expected=np.array([0, 1, 0], dtype=np.int32))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmax(x, axis=1, output_type=dtypes.int32),
          np.array([[4, 1], [3, 2]], dtype=dtype),
          expected=np.array([0, 0], dtype=np.int32))

      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32),
          np.array([3, 10, 27, 3, 2, 4], dtype=dtype),
          expected=np.int32(4))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmin(x, axis=0, output_type=dtypes.int32),
          np.array([[4, 1, 7], [3, 2, 4]], dtype=dtype),
          expected=np.array([1, 0, 1], dtype=np.int32))
      self._assertOpOutputMatchesExpected(
          lambda x: math_ops.argmin(x, axis=1, output_type=dtypes.int32),
          np.array([[4, 1], [3, 2]], dtype=dtype),
          expected=np.array([1, 1], dtype=np.int32))
Example #4
0
  def _logits_to_prediction(self, logits=None):
    predictions = {}
    # Workaround for argmax dropping the second demension.
    predictions[PedictionKey.LOGITS] = array_ops.expand_dims(
        math_ops.argmax(logits, 1), 1)
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[PedictionKey.CLASSES] = array_ops.expand_dims(
        math_ops.argmax(logits, 1), 1)

    return predictions
  def call(self, labels, predictions, weights=None):
    """Accumulate accuracy statistics.

    `labels` and `predictions` should have the same shape except the
    predictions must have one additional trailing dimension equal to the
    number of classes(you want to predict).

    Type of labels and predictions can be different.

    Args:
      labels: Tensor of shape (batch_size, ) containing integers
      predictions: Tensor with the logits or probabilities for each example.
      weights: Optional weighting of each example. Defaults to 1.

    Returns:
      The arguments, for easy chaining.
    """
    check_ops.assert_equal(
        array_ops.shape(labels), array_ops.shape(predictions)[0],
        message="First axis of labels and predictions is unequal")
    predictions = math_ops.argmax(predictions, axis=-1)
    labels = math_ops.cast(labels, dtypes.int64)
    matches = math_ops.equal(labels, predictions)
    matches = math_ops.cast(matches, self.dtype)
    super(SparseAccuracy, self).call(matches, weights=weights)
    if weights is None:
      return labels, predictions
    return labels, predictions, weights
Example #6
0
  def _logits_to_prediction(self, logits=None):
    predictions = {}
    predictions[PredictionKey.LOGITS] = logits
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[PredictionKey.CLASSES] = math_ops.argmax(logits, 1)

    return predictions
Example #7
0
def calculate_sequence_by_mask(mask, time_major):
  """Calculate the sequence length tensor (1-D) based on the masking tensor.

  The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For
  any timestep that should be masked, the corresponding field will be False.
  Consider the following example:
    a = [[True, True, False, False],
         [True, False, True, False]]
  It is a (2, 4) tensor, and the corresponding sequence length result should be
  1D tensor with value [2, 3]. Note that for the second example, we need to find
  the index of the last True value, which is 2 and sequence length is 3.

  Args:
    mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if
      time_major=True.
    time_major: Boolean, which indicates whether the mask is time major or batch
      major.
  Returns:
    sequence_length: 1D int32 tensor.
  """
  timestep_index = 0 if time_major else 1
  max_seq_length = array_ops.shape(mask)[timestep_index]
  reversed_mask = math_ops.cast(array_ops.reverse(mask, axis=[timestep_index]),
                                dtypes.int32)
  # Use the argmax to find the index of leading 1 in the reversed mask, which is
  # the index of the last True value in the original mask.
  reversed_index = math_ops.argmax(reversed_mask, axis=timestep_index,
                                   output_type=dtypes.int32)
  return max_seq_length - reversed_index
Example #8
0
  def logits_to_predictions(self, logits, proba=False):
    if proba:
      raise ValueError(
          "logits to probabilities is not supported for _BinarySvmTargetColumn")

    logits = array_ops.concat([array_ops.zeros_like(logits), logits], 1)
    return math_ops.argmax(logits, 1)
def _multi_value_predictions(
    activations, target_column, problem_type, predict_probabilities):
  """Maps `activations` from the RNN to predictions for multi value models.

  If `predict_probabilities` is `False`, this function returns a `dict`
  containing single entry with key `prediction_key.PredictionKey.CLASSES` for
  `problem_type` `ProblemType.CLASSIFICATION` or
  `prediction_key.PredictionKey.SCORE` for `problem_type`
  `ProblemType.LINEAR_REGRESSION`.

  If `predict_probabilities` is `True`, it will contain a second entry with key
  `prediction_key.PredictionKey.PROBABILITIES`. The
  value of this entry is a `Tensor` of probabilities with shape
  `[batch_size, padded_length, num_classes]`.

  Note that variable length inputs will yield some predictions that don't have
  meaning. For example, if `sequence_length = [3, 2]`, then prediction `[1, 2]`
  has no meaningful interpretation.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    target_column: An initialized `TargetColumn`, calculate predictions.
    problem_type: Either `ProblemType.CLASSIFICATION` or
      `ProblemType.LINEAR_REGRESSION`.
    predict_probabilities: A Python boolean, indicating whether probabilities
      should be returned. Should only be set to `True` for
      classification/logistic regression problems.
  Returns:
    A `dict` mapping strings to `Tensors`.
  """
  with ops.name_scope('MultiValuePrediction'):
    activations_shape = array_ops.shape(activations)
    flattened_activations = array_ops.reshape(activations,
                                              [-1, activations_shape[2]])
    prediction_dict = {}
    if predict_probabilities:
      flat_probabilities = target_column.logits_to_predictions(
          flattened_activations, proba=True)
      flat_predictions = math_ops.argmax(flat_probabilities, 1)
      if target_column.num_label_columns == 1:
        probability_shape = array_ops.concat([activations_shape[:2], [2]], 0)
      else:
        probability_shape = activations_shape
      probabilities = array_ops.reshape(
          flat_probabilities, probability_shape,
          name=prediction_key.PredictionKey.PROBABILITIES)
      prediction_dict[
          prediction_key.PredictionKey.PROBABILITIES] = probabilities
    else:
      flat_predictions = target_column.logits_to_predictions(
          flattened_activations, proba=False)
    predictions_name = (prediction_key.PredictionKey.CLASSES
                        if problem_type == constants.ProblemType.CLASSIFICATION
                        else prediction_key.PredictionKey.SCORES)
    predictions = array_ops.reshape(
        flat_predictions, [activations_shape[0], activations_shape[1]],
        name=predictions_name)
    prediction_dict[predictions_name] = predictions
    return prediction_dict
Example #10
0
 def _convert_to_estimator_model_result(self, logits_fn_result):
   logits, loss, train_op = logits_fn_result
   return {
       Classifier.CLASS_OUTPUT:
           math_ops.argmax(logits, len(logits.get_shape()) - 1),
       Classifier.PROBABILITY_OUTPUT: nn.softmax(logits)
   }, loss, train_op
    def _ModelFn(features, labels, mode):
      if is_training:
        logits_out = self._BuildGraph(features)
      else:
        graph_def = self._GetGraphDef(use_trt, batch_size, model_dir)
        logits_out = importer.import_graph_def(
            graph_def,
            input_map={INPUT_NODE_NAME: features},
            return_elements=[OUTPUT_NODE_NAME + ':0'],
            name='')[0]

      loss = losses.sparse_softmax_cross_entropy(
          labels=labels, logits=logits_out)
      summary.scalar('loss', loss)

      classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out')
      accuracy = metrics.accuracy(
          labels=labels, predictions=classes_out, name='acc_op')
      summary.scalar('accuracy', accuracy[1])

      if mode == ModeKeys.EVAL:
        return EstimatorSpec(
            mode, loss=loss, eval_metric_ops={'accuracy': accuracy})
      elif mode == ModeKeys.TRAIN:
        optimizer = AdamOptimizer(learning_rate=1e-2)
        train_op = optimizer.minimize(loss, global_step=get_global_step())
        return EstimatorSpec(mode, loss=loss, train_op=train_op)
def _single_value_predictions(activations, sequence_length, target_column, predict_probabilities):
    """Maps `activations` from the RNN to predictions for single value models.

  If `predict_probabilities` is `False`, this function returns a `dict`
  containing single entry with key `PREDICTIONS_KEY`. If `predict_probabilities`
  is `True`, it will contain a second entry with key `PROBABILITIES_KEY`. The
  value of this entry is a `Tensor` of probabilities with shape
  `[batch_size, num_classes]`.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    sequence_length: A `Tensor` with shape `[batch_size]` and dtype `int32`
      containing the length of each sequence in the batch. If `None`, sequences
      are assumed to be unpadded.
    target_column: An initialized `TargetColumn`, calculate predictions.
    predict_probabilities: A Python boolean, indicating whether probabilities
      should be returned. Should only be set to `True` for
      classification/logistic regression problems.
  Returns:
    A `dict` mapping strings to `Tensors`.
  """
    with ops.name_scope("SingleValuePrediction"):
        last_activations = select_last_activations(activations, sequence_length)
        if predict_probabilities:
            probabilities = target_column.logits_to_predictions(last_activations, proba=True)
            prediction_dict = {
                RNNKeys.PROBABILITIES_KEY: probabilities,
                RNNKeys.PREDICTIONS_KEY: math_ops.argmax(probabilities, 1),
            }
        else:
            predictions = target_column.logits_to_predictions(last_activations, proba=False)
            prediction_dict = {RNNKeys.PREDICTIONS_KEY: predictions}
        return prediction_dict
Example #13
0
  def __call__(self, inputs, state, scope=None):
    """Build the CrfDecodeForwardRnnCell.

    Args:
      inputs: A [batch_size, num_tags] matrix of unary potentials.
      state: A [batch_size, num_tags] matrix containing the previous step's
            score values.
      scope: Unused variable scope of this cell.

    Returns:
      backpointers: A [batch_size, num_tags] matrix of backpointers.
      new_state: A [batch_size, num_tags] matrix of new score values.
    """
    # For simplicity, in shape comments, denote:
    # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
    state = array_ops.expand_dims(state, 2)                         # [B, O, 1]

    # This addition op broadcasts self._transitions_params along the zeroth
    # dimension and state along the second dimension.
    # [B, O, 1] + [1, O, O] -> [B, O, O]
    transition_scores = state + self._transition_params             # [B, O, O]
    new_state = inputs + math_ops.reduce_max(transition_scores, [1])  # [B, O]
    backpointers = math_ops.argmax(transition_scores, 1)
    backpointers = math_ops.cast(backpointers, dtype=dtypes.int32)    # [B, O]
    return backpointers, new_state
Example #14
0
 def extract_argmax_and_embed(prev, _):
   """Loop_function that extracts the symbol from prev and embeds it."""
   if output_projection is not None:
     prev = nn_ops.xw_plus_b(
         prev, output_projection[0], output_projection[1])
   prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1))
   return embedding_ops.embedding_lookup(embedding, prev_symbol)
Example #15
0
 def mode(self, name="mode"):
   with ops.name_scope(self.name):
     with ops.op_scope([], name):
       ret = math_ops.argmax(self.logits, dimension=self._batch_rank)
       ret = math_ops.cast(ret, self._dtype)
       ret.set_shape(self.get_batch_shape())
       return ret
def _one_hot_to_embedding(one_hot, embedding_size):
  """Get a dense embedding vector from a one-hot encoding."""
  num_tokens = one_hot.shape[1]
  label_id = math_ops.argmax(one_hot, axis=1)
  embedding = variable_scope.get_variable(
      'embedding', [num_tokens, embedding_size])
  return embedding_ops.embedding_lookup(
      embedding, label_id, name='token_to_embedding')
Example #17
0
  def _logits_to_predictions(self, logits, proba=False):
    if self._n_classes == 2:
      logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])

    if proba:
      return nn.softmax(logits)
    else:
      return math_ops.argmax(logits, 1)
Example #18
0
 def _multiclass_metrics(predictions, labels, weights):
   """Prepares eval metrics for multiclass eval."""
   metrics = dict()
   logits = predictions["scores"]
   classes = math_ops.argmax(logits, 1)
   metrics["accuracy"] = metrics_lib.streaming_accuracy(
       classes, labels, weights)
   return metrics
Example #19
0
  def logits_to_predictions(self, logits, proba=False):
    if self.num_label_columns == 1:
      logits = array_ops.concat([array_ops.zeros_like(logits), logits], 1)

    if proba:
      return nn.softmax(logits)
    else:
      return math_ops.argmax(logits, 1)
Example #20
0
 def _logits_to_predictions(self, logits):
   """See `_MultiClassHead`."""
   with ops.name_scope(None, "predictions", (logits,)):
     return {
         prediction_key.PredictionKey.LOGITS: logits,
         prediction_key.PredictionKey.CLASSES: math_ops.argmax(
             _one_class_to_two_class_logits(logits), 1,
             name=prediction_key.PredictionKey.CLASSES)
     }
Example #21
0
 def sample(self, time, outputs, state, name=None):
   """sample for GreedyEmbeddingHelper."""
   del time, state  # unused by sample_fn
   # Outputs are logits, use argmax to get the most probable id
   if not isinstance(outputs, ops.Tensor):
     raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                     type(outputs))
   sample_ids = math_ops.argmax(outputs, axis=-1, output_type=dtypes.int32)
   return sample_ids
Example #22
0
def _predictions(logits, n_classes):
    """Returns predictions for the given logits and n_classes."""
    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = array_ops.reshape(math_ops.argmax(logits, 1), shape=(-1, 1))
    return predictions
Example #23
0
  def _logits_to_predictions(self, logits):
    """See `_MultiClassHead`."""
    predictions = {}
    predictions[prediction_key.PredictionKey.LOGITS] = logits
    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[prediction_key.PredictionKey.CLASSES] = math_ops.argmax(
        logits, 1)

    return predictions
Example #24
0
  def _logits_to_prediction(self, logits=None):
    predictions = {PredictionKey.LOGITS: logits}
    if self.logits_dimension == 1:
      predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits)
      logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[PredictionKey.PROBABILITIES] = nn.softmax(logits)
    predictions[PredictionKey.CLASSES] = math_ops.argmax(logits, 1)

    return predictions
    def body(i, prev_c, prev_h, actions, log_probs):
      # pylint: disable=g-long-lambda
      signal = control_flow_ops.cond(
          math_ops.equal(i, 0),
          lambda: array_ops.tile(device_go_embedding,
                                 [self.hparams.num_children, 1]),
          lambda: embedding_ops.embedding_lookup(device_embeddings,
                                                 actions.read(i - 1))
      )
      if self.hparams.keep_prob is not None:
        signal = nn_ops.dropout(signal, self.hparams.keep_prob)
      next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
      query = math_ops.matmul(next_h, attn_w_2)
      query = array_ops.reshape(
          query, [self.hparams.num_children, 1, self.hparams.hidden_size])
      query = math_ops.tanh(query + attn_mem)
      query = array_ops.reshape(query, [
          self.hparams.num_children * self.num_groups, self.hparams.hidden_size
      ])
      query = math_ops.matmul(query, attn_v)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups])
      query = nn_ops.softmax(query)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups, 1])
      query = math_ops.reduce_sum(attn_mem * query, axis=1)
      query = array_ops.concat([next_h, query], axis=1)
      logits = math_ops.matmul(query, device_softmax)
      logits /= self.hparams.temperature
      if self.hparams.tanh_constant > 0:
        logits = math_ops.tanh(logits) * self.hparams.tanh_constant
      if self.hparams.logits_std_noise > 0:
        num_in_logits = math_ops.cast(
            array_ops.size(logits), dtype=dtypes.float32)
        avg_norm = math_ops.divide(
            linalg_ops.norm(logits), math_ops.sqrt(num_in_logits))
        logits_noise = random_ops.random_normal(
            array_ops.shape(logits),
            stddev=self.hparams.logits_std_noise * avg_norm)
        logits = control_flow_ops.cond(
            self.global_step > self.hparams.stop_noise_step, lambda: logits,
            lambda: logits + logits_noise)

      if mode == "sample":
        next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
      elif mode == "greedy":
        next_y = math_ops.argmax(logits, 1)
      elif mode == "target":
        next_y = array_ops.slice(y, [0, i], [-1, 1])
      else:
        raise NotImplementedError
      next_y = math_ops.to_int32(next_y)
      next_y = array_ops.reshape(next_y, [self.hparams.num_children])
      actions = actions.write(i, next_y)
      log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=next_y)
      return i + 1, next_c, next_h, actions, log_probs
Example #26
0
 def _logits_to_prediction(self, logits=None):
   predictions = {PedictionKey.LOGITS: logits}
   if self.logits_dimension == 1:
     predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits)
     logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
   predictions[PedictionKey.PROBABILITIES] = nn.softmax(logits)
   # Workaround for argmax dropping the second demension.
   predictions[PedictionKey.CLASSES] = array_ops.expand_dims(
       math_ops.argmax(logits, 1), 1)
   return predictions
Example #27
0
def sparse_categorical_accuracy(y_true, y_pred):
  y_true = math_ops.reduce_max(y_true, axis=-1)
  y_pred = math_ops.argmax(y_pred, axis=-1)

  # If the expected labels are float, we need to cast the int returned by
  # argmax to compare.
  if K.dtype(y_true) == K.floatx():
    y_pred = math_ops.cast(y_pred, K.floatx())

  return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
Example #28
0
 def loop_function(prev, _):
     if output_projection is not None:
         prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])
     prev_symbol = math_ops.argmax(prev, 1)
     # Note that gradients will not propagate through the second parameter of
     # embedding_lookup.
     emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
     if not update_embedding:
         emb_prev = array_ops.stop_gradient(emb_prev)
     return emb_prev
def composed_sampler(logits, num_samples):
  # [batch size, num classes, num samples]
  unif = random_ops.random_uniform(logits.get_shape().concatenate(
      tensor_shape.TensorShape([num_samples])))
  noise = -math_ops.log(-math_ops.log(unif))
  # [batch size, num classes, 1]
  logits = array_ops.expand_dims(logits, -1)

  # [batch size, num samples]
  return math_ops.argmax(logits + noise, axis=1)
Example #30
0
def crf_decode(potentials, transition_params, sequence_length):
  """Decode the highest scoring sequence of tags in TensorFlow.

  This is a function for tensor.

  Args:
    potentials: A [batch_size, max_seq_len, num_tags] tensor of
              unary potentials.
    transition_params: A [num_tags, num_tags] matrix of
              binary potentials.
    sequence_length: A [batch_size] vector of true sequence lengths.

  Returns:
    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
                Contains the highest scoring tag indicies.
    best_score: A [batch_size] vector, containing the score of `decode_tags`.
  """
  # For simplicity, in shape comments, denote:
  # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
  num_tags = potentials.get_shape()[2].value

  # Computes forward decoding. Get last score and backpointers.
  crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
  initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
  initial_state = array_ops.squeeze(initial_state, axis=[1])      # [B, O]
  inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])   # [B, T-1, O]
  backpointers, last_score = rnn.dynamic_rnn(
      crf_fwd_cell,
      inputs=inputs,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)             # [B, T - 1, O], [B, O]
  backpointers = gen_array_ops.reverse_sequence(
      backpointers, sequence_length - 1, seq_dim=1)               # [B, T-1, O]

  # Computes backward decoding. Extract tag indices from backpointers.
  crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
  initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),
                                dtype=dtypes.int32)               # [B]
  initial_state = array_ops.expand_dims(initial_state, axis=-1)   # [B, 1]
  decode_tags, _ = rnn.dynamic_rnn(
      crf_bwd_cell,
      inputs=backpointers,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)           # [B, T - 1, 1]
  decode_tags = array_ops.squeeze(decode_tags, axis=[2])           # [B, T - 1]
  decode_tags = array_ops.concat([initial_state, decode_tags], axis=1)  # [B, T]
  decode_tags = gen_array_ops.reverse_sequence(
      decode_tags, sequence_length, seq_dim=1)                     # [B, T]

  best_score = math_ops.reduce_max(last_score, axis=1)             # [B]
  return decode_tags, best_score
Example #31
0
 def _mode(self):
     ret = math_ops.argmax(self.logits, axis=self._batch_rank)
     ret = math_ops.cast(ret, self.dtype)
     ret.set_shape(self.batch_shape)
     return ret
Example #32
0
 def argmax(x):
     return math_ops.argmax(x)
Example #33
0
 def _single_seq_fn():
     squeezed_potentials = array_ops.squeeze(potentials, [1])
     decode_tags = array_ops.expand_dims(
         math_ops.argmax(squeezed_potentials, axis=1), 1)
     best_score = math_ops.reduce_max(squeezed_potentials, axis=1)
     return math_ops.cast(decode_tags, dtype=dtypes.int32), best_score
Example #34
0
def TestModel(inputs):
    scale = variables.Variable(1.0, trainable=False)

    # Scaling the outputs wont change the result...
    outputs = math_ops.multiply(inputs, scale)
    return math_ops.argmax(outputs, 1), scale
Example #35
0
 def _mode(self):
     ret = math_ops.argmax(self.logits, dimension=self._batch_rank)
     ret = math_ops.cast(ret, self.dtype)
     ret.set_shape(self.get_batch_shape())
     return ret
Example #36
0
    def decoder_fn(time, cell_state, cell_input, cell_output, context_state):
        """ Decoder function used in the `dynamic_rnn_decoder` with the purpose of
            inference.

            The main difference between this decoder function and the `decoder_fn` in
            `simple_decoder_fn_train` is how `next_cell_input` is calculated. In this
            decoder function we calculate the next input by applying an argmax across
            the feature dimension of the output from the decoder. This is a
            greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014)
            use beam-search instead.

            Args:
              time: positive integer constant reflecting the current timestep.
              cell_state: state of RNNCell.
              cell_input: input provided by `dynamic_rnn_decoder`.
              cell_output: output of RNNCell.
              context_state: context state provided by `dynamic_rnn_decoder`.

            Returns:
              A tuple (done, next state, next input, emit output, next context state)
              where:

              done: A boolean vector to indicate which sentences has reached a
              `end_of_sequence_id`. This is used for early stopping by the
              `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with
              all elements as `true` is returned.

              next state: `cell_state`, this decoder function does not modify the
              given state.

              next input: The embedding from argmax of the `cell_output` is used as
              `next_input`.

              emit output: If `output_fn is None` the supplied `cell_output` is
              returned, else the `output_fn` is used to update the `cell_output`
              before calculating `next_input` and returning `cell_output`.

              next context state: `context_state`, this decoder function does not
              modify the given context state. The context state could be modified when
              applying e.g. beam search.
        """
        with ops.name_scope(name, "simple_decoder_fn_inference",
                            [time, cell_state, cell_input, cell_output,
                             context_state]):
            if cell_input is not None:
                raise ValueError("Expected cell_input to be None, but saw: %s" %
                                 cell_input)
            if cell_output is None:
                # invariant that this is time == 0
                next_input_id = array_ops.ones([batch_size, ], dtype=dtype) * (
                    start_of_sequence_id)
                done = array_ops.zeros([batch_size, ], dtype=dtypes.bool)
                cell_state = encoder_state
                cell_output = array_ops.zeros([num_decoder_symbols],
                                              dtype=dtypes.float32)
                context_state = tf.zeros((batch_size, maxium_length_int), dtype=tf.int32)
            else:
                cell_output = output_fn(cell_output)

                if decode_type == 'sample':
                    matrix_U = -1.0 * tf.log(
                        -1.0 * tf.log(tf.random_uniform(tf.shape(cell_output), minval=0.0, maxval=1.0)))
                    next_input_id = math_ops.cast(
                        tf.argmax(tf.subtract(cell_output, matrix_U), dimension=1), dtype=dtype)
                elif decode_type == 'greedy':
                    next_input_id = math_ops.cast(
                        math_ops.argmax(cell_output, 1), dtype=dtype)
                else:
                    raise ValueError("unknown decode type")

                done = math_ops.equal(next_input_id, end_of_sequence_id)
                # save the results into context state
                expanded_next_input = tf.expand_dims(next_input_id, axis=1)
                sliced_context_state = tf.slice(context_state, [0, 0], [-1, maxium_length_int - 1])
                context_state = tf.concat([expanded_next_input, sliced_context_state], axis=1)
                context_state = tf.reshape(context_state, [batch_size, maxium_length_int])

            next_input = array_ops.gather(embeddings, next_input_id)
            if context_vector is not None:
                next_input = tf.concat([next_input, context_vector], axis=1)
            # if time > maxlen, return all true vector
            done = control_flow_ops.cond(math_ops.greater(time, maximum_length),
                                         lambda: array_ops.ones([batch_size, ], dtype=dtypes.bool),
                                         lambda: done)
            return (done, cell_state, next_input, cell_output, context_state)
Example #37
0
    def decoder_fn(time, cell_state, cell_input, cell_output, context_state):
        """Decoder function used in the `dynamic_rnn_decoder` for inference.

    The main difference between this decoder function and the `decoder_fn` in
    `attention_decoder_fn_train` is how `next_cell_input` is calculated. In
    decoder function we calculate the next input by applying an argmax across
    the feature dimension of the output from the decoder. This is a
    greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014)
    use beam-search instead.

    Args:
      time: positive integer constant reflecting the current timestep.
      cell_state: state of RNNCell.
      cell_input: input provided by `dynamic_rnn_decoder`.
      cell_output: output of RNNCell.
      context_state: context state provided by `dynamic_rnn_decoder`.

    Returns:
      A tuple (done, next state, next input, emit output, next context state)
      where:

      done: A boolean vector to indicate which sentences has reached a
      `end_of_sequence_id`. This is used for early stopping by the
      `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with
      all elements as `true` is returned.

      next state: `cell_state`, this decoder function does not modify the
      given state.

      next input: The embedding from argmax of the `cell_output` is used as
      `next_input`.

      emit output: If `output_fn is None` the supplied `cell_output` is
      returned, else the `output_fn` is used to update the `cell_output`
      before calculating `next_input` and returning `cell_output`.

      next context state: `context_state`, this decoder function does not
      modify the given context state. The context state could be modified when
      applying e.g. beam search.

    Raises:
      ValueError: if cell_input is not None.

    """
        with ops.name_scope(
                name, "attention_decoder_fn_inference",
            [time, cell_state, cell_input, cell_output, context_state]):
            if cell_input is not None:
                raise ValueError(
                    "Expected cell_input to be None, but saw: %s" % cell_input)
            if cell_output is None:
                # invariant that this is time == 0
                next_input_id = array_ops.ones([
                    batch_size,
                ], dtype=dtype) * (start_of_sequence_id)
                done = array_ops.zeros([
                    batch_size,
                ], dtype=dtypes.bool)
                cell_state = encoder_state
                cell_output = array_ops.zeros([num_decoder_symbols],
                                              dtype=dtypes.float32)
                cell_input = array_ops.gather(embeddings, next_input_id)
                cell_type = array_ops.zeros([3], dtype=dtypes.float32)

                # init attention
                attention = _init_attention(encoder_state)
            else:
                # construct attention
                attention = attention_construct_fn(cell_output, attention_keys,
                                                   attention_values)
                cell_output = attention  #batch*2num_units

                cell_output = output_fn(
                    cell_output)  # probability on vocabulary list
                #u_sample = tf.random_uniform([batch_size, num_decoder_symbols])
                #g_sample = -tf.log(-tf.log(u_sample + 1e-18) + 1e-18)
                #cell_output_samplemax = tf.log(cell_output+1e-18) + g_sample

                next_input_id = math_ops.cast(math_ops.argmax(cell_output, 1),
                                              dtype=dtype)
                #next_input_id = math_ops.cast(
                #math_ops.argmax(cell_output_samplemax, 1), dtype=dtype)
                done = math_ops.equal(next_input_id, end_of_sequence_id)
                cell_input = array_ops.gather(embeddings, next_input_id)

            # combine cell_input and attention
            next_input = array_ops.concat([cell_input, attention], 1)

            # if time > maxlen, return all true vector
            done = control_flow_ops.cond(
                math_ops.greater(time, maximum_length),
                lambda: array_ops.ones([
                    batch_size,
                ], dtype=dtypes.bool), lambda: done)
            return (done, cell_state, next_input, cell_output, context_state)
Example #38
0
 def sample(self, time, outputs, name=None, **unused_kwargs):
     """Gets a sample for one step."""
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
         sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1),
                                    dtypes.int32)
         return sample_ids
Example #39
0
 def argmax(x):
     i = math_ops.argmax(x)
     return array_ops.stop_gradient(i)
Example #40
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    with variable_scope.variable_scope(
        None, default_name='binary_logistic_head',
        values=(tuple(six.itervalues(features)) + (labels, logits))):

      # Predict.
      pred_keys = prediction_keys.PredictionKeys
      logits = _check_logits(logits, self.logits_dimension)
      logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
      two_class_logits = array_ops.concat(
          (array_ops.zeros_like(logits), logits), 1, name='two_class_logits')
      scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES)
      classes = array_ops.reshape(
          math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes')
      predictions = {
          pred_keys.LOGITS: logits,
          pred_keys.LOGISTIC: logistic,
          pred_keys.PROBABILITIES: scores,
          pred_keys.CLASS_IDS: classes
      }
      if mode == model_fn.ModeKeys.PREDICT:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={'': export_output.ClassificationOutput(
                scores=scores,
                # `ClassificationOutput` requires string classes.
                # TODO(ptucker): Support label_keys.
                classes=string_ops.as_string(classes, name='str_classes'))})

      # Eval.
      labels = _check_labels(_maybe_expand_dim(math_ops.to_float(labels)),
                             self.logits_dimension)
      unweighted_loss = nn.sigmoid_cross_entropy_with_logits(
          labels=labels, logits=logits, name='loss')
      weights = (
          1. if (self._weight_feature_key is None) else
          features[self._weight_feature_key])
      weights = _maybe_expand_dim(math_ops.to_float(weights, name='weights'))
      training_loss = losses.compute_weighted_loss(
          unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=self._eval_metric_ops(
                labels=labels,
                logits=logits,
                logistic=logistic,
                scores=scores,
                classes=classes,
                unweighted_loss=unweighted_loss,
                weights=weights))

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
      logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss)
      logging_ops.scalar_summary(
          metric_keys.MetricKeys.LOSS_MEAN,
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
      return model_fn.EstimatorSpec(
          mode=model_fn.ModeKeys.TRAIN,
          predictions=predictions,
          loss=training_loss,
          train_op=train_op_fn(training_loss))
Example #41
0
    def decoder_fn(time, cell_state, cell_input, cell_output, context_state):
        """ Decoder function used in the `dynamic_rnn_decoder` with the purpose of
    inference.

    The main difference between this decoder function and the `decoder_fn` in
    `greedy_decoder_fn_train` is how `next_cell_input` is calculated. In this
    decoder function we calculate the next input by applying an argmax across
    the feature dimension of the output from the decoder. This is a
    greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014)
    use beam-search instead.

    Args:
      time: positive integer constant reflecting the current timestep.
      cell_state: state of RNNCell.
      cell_input: input provided by `dynamic_rnn_decoder`.
      cell_output: output of RNNCell.
      context_state: context state provided by `dynamic_rnn_decoder`.

    Returns:
      A tuple (done, next state, next input, emit output, next context state)
      where:

      done: A boolean vector to indicate which sentences has reached a
      `end_of_sequence_id`. This is used for early stopping by the
      `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with
      all elements as `true` is returned.

      next state: `cell_state`, this decoder function does not modify the
      given state.

      next input: The embedding from argmax of the `cell_output` is used as
      `next_input`.

      emit output: If `output_fn is None` the supplied `cell_output` is
      returned, else the `output_fn` is used to update the `cell_output`
      before calculating `next_input` and returning `cell_output`.

      next context state: `context_state`, this decoder function does not
      modify the given context state. The context state could be modified when
      applying e.g. beam search.
  """
        with ops.name_scope(
                name, "greedy_decoder_fn_inference",
            [time, cell_state, cell_input, cell_output, context_state]):
            if cell_input is not None:
                raise ValueError(
                    "Expected cell_input to be None, but saw: %s" % cell_input)
            if cell_output is None:
                # invariant that this is time == 0
                next_input_id = None
                done = array_ops.zeros([
                    batch_size,
                ], dtype=dtypes.bool)
                cell_state = encoder_state
                cell_output = array_ops.zeros([num_decoder_symbols],
                                              dtype=dtypes.float32)
                context_state = tensor_array_ops.TensorArray(
                    #dtype=dtype, tensor_array_name="greedy_path", size=maximum_length + 1, infer_shape=False)
                    dtype=dtype,
                    tensor_array_name="greedy_path",
                    size=0,
                    dynamic_size=True,
                    infer_shape=False)
            else:
                cell_output = output_fn(cell_output)
                next_input_id = math_ops.cast(math_ops.argmax(cell_output, 1),
                                              dtype=dtype)

                done = math_ops.equal(next_input_id, end_of_sequence_id)
                #done = tf.zeros_like(next_input_id, dtype=tf.bool)
                context_state = context_state.write(time - 1, next_input_id)

            next_input = array_ops.gather(
                embeddings,
                next_input_id) if next_input_id is not None else first_input
            # if time == maxlen, return all true vector
            done = control_flow_ops.cond(
                math_ops.equal(time, maximum_length),
                lambda: array_ops.ones([
                    batch_size,
                ], dtype=dtypes.bool), lambda: done)
            return (done, cell_state, next_input, cell_output, context_state)
Example #42
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    with variable_scope.variable_scope(
        None,
        default_name='multi_class_head',
        values=(tuple(six.itervalues(features)) + (labels, logits))):
      logits = _check_logits(logits, self.logits_dimension)

      # Predict.
      pred_keys = prediction_keys.PredictionKeys
      with ops.name_scope(None, 'predictions', (logits,)):
        # class_ids's shape is [batch_size]
        class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASS_IDS)
        class_ids = array_ops.expand_dims(class_ids, axis=(1,))
        if self._label_vocabulary:
          table = lookup_ops.index_to_string_table_from_tensor(
              vocabulary_list=self._label_vocabulary,
              name='class_string_lookup')
          classes = table.lookup(class_ids)
        else:
          classes = string_ops.as_string(class_ids, name='str_classes')

        probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES)
        predictions = {
            pred_keys.LOGITS: logits,
            pred_keys.PROBABILITIES: probabilities,
            # Expand to [batch_size, 1]
            pred_keys.CLASS_IDS: class_ids,
            pred_keys.CLASSES: classes,
        }
      if mode == model_fn.ModeKeys.PREDICT:
        batch_size = array_ops.shape(probabilities)[0]
        export_class_list = self._label_vocabulary
        if not export_class_list:
          export_class_list = string_ops.as_string(
              math_ops.range(self._n_classes))
        export_output_classes = array_ops.tile(
            input=array_ops.expand_dims(input=export_class_list, axis=0),
            multiples=[batch_size, 1])
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={
                '':
                    export_output.ClassificationOutput(
                        scores=probabilities,
                        # `ClassificationOutput` requires string classes.
                        classes=export_output_classes)
            })

      # Eval.
      label_ids = self._label_ids(_check_labels(_maybe_expand_dim(labels), 1))

      unweighted_loss = losses.sparse_softmax_cross_entropy(
          labels=label_ids, logits=logits, reduction=losses.Reduction.NONE)
      # Restore the squeezed dim, so unweighted_loss matches the weights shape.
      unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=(1,))
      weights = (
          1. if (self._weight_feature_key is None) else
          features[self._weight_feature_key])
      weights = _maybe_expand_dim(math_ops.to_float(weights, name='weights'))
      training_loss = losses.compute_weighted_loss(
          unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=self._eval_metric_ops(
                labels=label_ids,
                probabilities=probabilities,
                logits=logits,
                class_ids=class_ids,
                unweighted_loss=unweighted_loss,
                weights=weights))

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
      logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS, training_loss)
      logging_ops.scalar_summary(
          metric_keys.MetricKeys.LOSS_MEAN,
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
      return model_fn.EstimatorSpec(
          mode=model_fn.ModeKeys.TRAIN,
          predictions=predictions,
          loss=training_loss,
          train_op=train_op_fn(training_loss))
Example #43
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    # Predict.
    with ops.name_scope('head'):
      with ops.name_scope(None, 'predictions', (logits,)):
        pred_keys = prediction_keys.PredictionKeys
        logits = _check_logits(logits, self.logits_dimension)
        logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
        two_class_logits = array_ops.concat(
            (array_ops.zeros_like(logits), logits), 1, name='two_class_logits')
        scores = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES)
        class_ids = array_ops.reshape(
            math_ops.argmax(two_class_logits, axis=1), (-1, 1), name='classes')
        if self._label_vocabulary:
          table = lookup_ops.index_to_string_table_from_tensor(
              vocabulary_list=self._label_vocabulary,
              name='class_string_lookup')
          classes = table.lookup(class_ids)
        else:
          classes = string_ops.as_string(class_ids, name='str_classes')
        predictions = {
            pred_keys.LOGITS: logits,
            pred_keys.LOGISTIC: logistic,
            pred_keys.PROBABILITIES: scores,
            pred_keys.CLASS_IDS: class_ids,
            pred_keys.CLASSES: classes,
        }
      if mode == model_fn.ModeKeys.PREDICT:
        batch_size = array_ops.shape(logistic)[0]
        export_class_list = self._label_vocabulary
        if not export_class_list:
          export_class_list = string_ops.as_string([0, 1])
        export_output_classes = array_ops.tile(
            input=array_ops.expand_dims(input=export_class_list, axis=0),
            multiples=[batch_size, 1])
        classifier_output = export_output.ClassificationOutput(
            scores=scores,
            # `ClassificationOutput` requires string classes.
            classes=export_output_classes)
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={
                _DEFAULT_SERVING_KEY: classifier_output,
                _CLASSIFY_SERVING_KEY: classifier_output,
                _REGRESS_SERVING_KEY: export_output.RegressionOutput(
                    value=logistic),
                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
            })

      # Eval.
      unweighted_loss, processed_labels = self.create_loss(
          features=features, mode=mode, logits=logits, labels=labels)
      weights = _weights(features, self._weight_column)
      training_loss = losses.compute_weighted_loss(
          unweighted_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=self._eval_metric_ops(
                labels=processed_labels,
                logits=logits,
                logistic=logistic,
                scores=scores,
                class_ids=class_ids,
                unweighted_loss=unweighted_loss,
                weights=weights))

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
    with ops.name_scope(''):
      summary.scalar(
          _summary_key(self._name, metric_keys.MetricKeys.LOSS),
          training_loss)
      summary.scalar(
          _summary_key(self._name, metric_keys.MetricKeys.LOSS_MEAN),
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
    return model_fn.EstimatorSpec(
        mode=model_fn.ModeKeys.TRAIN,
        predictions=predictions,
        loss=training_loss,
        train_op=train_op_fn(training_loss))
Example #44
0
def _find_loss_augmented_facility_idx(pairwise_distances, labels, chosen_ids,
                                      candidate_ids, margin_multiplier,
                                      margin_type):
    """Find the next centroid that maximizes the loss augmented inference.

    This function is a subroutine called from compute_augmented_facility_locations

    Args:
      pairwise_distances: 2-D Tensor of pairwise distances.
      labels: 1-D Tensor of ground truth cluster assignment.
      chosen_ids: 1-D Tensor of current centroid indices.
      candidate_ids: 1-D Tensor of candidate indices.
      margin_multiplier: multiplication constant.
      margin_type: Type of structured margin to use. Default is nmi.

    Returns:
      integer index.
    """
    num_candidates = array_ops.shape(candidate_ids)[0]

    pairwise_distances_chosen = array_ops.gather(pairwise_distances,
                                                 chosen_ids)
    pairwise_distances_candidate = array_ops.gather(pairwise_distances,
                                                    candidate_ids)
    pairwise_distances_chosen_tile = array_ops.tile(pairwise_distances_chosen,
                                                    [1, num_candidates])

    candidate_scores = -1.0 * math_ops.reduce_sum(array_ops.reshape(
        math_ops.reduce_min(array_ops.concat([
            pairwise_distances_chosen_tile,
            array_ops.reshape(pairwise_distances_candidate, [1, -1])
        ], 0),
                            axis=0,
                            keepdims=True), [num_candidates, -1]),
                                                  axis=1)

    nmi_scores = array_ops.zeros([num_candidates])
    iteration = array_ops.constant(0)

    def func_cond(iteration, nmi_scores):
        del nmi_scores  # Unused in func_cond()
        return iteration < num_candidates

    def func_body(iteration, nmi_scores):
        predictions = get_cluster_assignment(
            pairwise_distances,
            array_ops.concat([chosen_ids, [candidate_ids[iteration]]], 0))
        nmi_score_i = compute_clustering_score(labels, predictions,
                                               margin_type)
        pad_before = array_ops.zeros([iteration])
        pad_after = array_ops.zeros([num_candidates - 1 - iteration])
        # return 1 - NMI score as the structured loss.
        #   because NMI is higher the better [0,1].
        return iteration + 1, nmi_scores + array_ops.concat(
            [pad_before, [1.0 - nmi_score_i], pad_after], 0)

    _, nmi_scores = control_flow_ops.while_loop(func_cond, func_body,
                                                [iteration, nmi_scores])

    candidate_scores = math_ops.add(candidate_scores,
                                    margin_multiplier * nmi_scores)

    argmax_index = math_ops.cast(math_ops.argmax(candidate_scores, axis=0),
                                 dtypes.int32)

    return candidate_ids[argmax_index]
Example #45
0
 def _mode(self):
     ret = math_ops.argmax(self.logits, axis=self._batch_rank)
     ret = array_ops.one_hot(ret, self.event_size, dtype=self.dtype)
     ret.set_shape(self.logits.get_shape())
     return ret
Example #46
0
def multi_value_predictions(activations, target_column, problem_type,
                            predict_probabilities):
    """Maps `activations` from the RNN to predictions for multi value models.

  If `predict_probabilities` is `False`, this function returns a `dict`
  containing single entry with key `prediction_key.PredictionKey.CLASSES` for
  `problem_type` `ProblemType.CLASSIFICATION` or
  `prediction_key.PredictionKey.SCORE` for `problem_type`
  `ProblemType.LINEAR_REGRESSION`.

  If `predict_probabilities` is `True`, it will contain a second entry with key
  `prediction_key.PredictionKey.PROBABILITIES`. The
  value of this entry is a `Tensor` of probabilities with shape
  `[batch_size, padded_length, num_classes]`.

  Note that variable length inputs will yield some predictions that don't have
  meaning. For example, if `sequence_length = [3, 2]`, then prediction `[1, 2]`
  has no meaningful interpretation.

  Args:
    activations: Output from an RNN. Should have dtype `float32` and shape
      `[batch_size, padded_length, ?]`.
    target_column: An initialized `TargetColumn`, calculate predictions.
    problem_type: Either `ProblemType.CLASSIFICATION` or
      `ProblemType.LINEAR_REGRESSION`.
    predict_probabilities: A Python boolean, indicating whether probabilities
      should be returned. Should only be set to `True` for
      classification/logistic regression problems.
  Returns:
    A `dict` mapping strings to `Tensors`.
  """
    with ops.name_scope('MultiValuePrediction'):
        activations_shape = array_ops.shape(activations)
        flattened_activations = array_ops.reshape(activations,
                                                  [-1, activations_shape[2]])
        prediction_dict = {}
        if predict_probabilities:
            flat_probabilities = target_column.logits_to_predictions(
                flattened_activations, proba=True)
            flat_predictions = math_ops.argmax(flat_probabilities, 1)
            if target_column.num_label_columns == 1:
                probability_shape = array_ops.concat(
                    [activations_shape[:2], [2]], 0)
            else:
                probability_shape = activations_shape
            probabilities = array_ops.reshape(
                flat_probabilities,
                probability_shape,
                name=prediction_key.PredictionKey.PROBABILITIES)
            prediction_dict[
                prediction_key.PredictionKey.PROBABILITIES] = probabilities
        else:
            flat_predictions = target_column.logits_to_predictions(
                flattened_activations, proba=False)
        predictions_name = (prediction_key.PredictionKey.CLASSES
                            if problem_type
                            == constants.ProblemType.CLASSIFICATION else
                            prediction_key.PredictionKey.SCORES)
        predictions = array_ops.reshape(
            flat_predictions, [activations_shape[0], activations_shape[1]],
            name=predictions_name)
        prediction_dict[predictions_name] = predictions
        return prediction_dict
Example #47
0
 def _my_metric_op(predictions, targets):
     """Simply multiplies predictions and targets to return [1, 0 , 0]."""
     prediction_classes = math_ops.argmax(predictions, 1)
     return tf.mul(prediction_classes, tf.reshape(targets, [-1]))
Example #48
0
 def calc_sample_id(self, time, logits):
   return tf.cond(time < conf.pick_multinomial_max_len,
           lambda: tf.cast(tf.multinomial(logits, 1), tf.int32)[:, 0],
           lambda: math_ops.cast(math_ops.argmax(logits, axis=-1), dtypes.int32)
           )
Example #49
0
 def testStopGradient(self):
     grad = backprop.gradients_function(
         lambda x: array_ops.stop_gradient(math_ops.argmax(x)))
     self.assertAllEqual(grad([0.0])[0], None)
Example #50
0
 def sampler(time, outputs, state):
     # this isn't necessary, but just do it to get the types right
     sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1),
                                tf.int32)
     return sample_ids
Example #51
0
    def create_estimator_spec(self,
                              features,
                              mode,
                              logits,
                              labels=None,
                              train_op_fn=None):
        """See `Head`."""
        with variable_scope.variable_scope(
                None,
                default_name='multi_class_head',
                values=(tuple(six.itervalues(features)) + (labels, logits))):
            logits = _check_logits(logits, self.logits_dimension)

            # Predict.
            pred_keys = prediction_keys.PredictionKeys
            with ops.name_scope(None, 'predictions', (logits, )):
                # class_ids's shape is [batch_size]
                class_ids = math_ops.argmax(logits, 1, name=pred_keys.CLASSES)
                probabilities = nn.softmax(logits,
                                           name=pred_keys.PROBABILITIES)
                predictions = {
                    pred_keys.LOGITS:
                    logits,
                    pred_keys.PROBABILITIES:
                    probabilities,
                    # Expand to [batch_size, 1]
                    pred_keys.CLASSES:
                    array_ops.expand_dims(class_ids, axis=(1, ))
                }
            if mode == model_fn.ModeKeys.PREDICT:
                batch_size = array_ops.shape(probabilities)[0]
                output_classes = array_ops.tile(input=array_ops.expand_dims(
                    input=math_ops.range(self._n_classes), axis=0),
                                                multiples=[batch_size, 1])
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.PREDICT,
                    predictions=predictions,
                    export_outputs={
                        '':
                        export_output.ClassificationOutput(
                            scores=probabilities,
                            # `ClassificationOutput` requires string classes.
                            # TODO(xiejw): Support label_keys or label_column
                            classes=string_ops.as_string(output_classes,
                                                         name='str_classes'))
                    })

            # Eval.
            labels = _check_labels(labels, 1)
            # Check that we got integer for classification.
            if not labels.dtype.is_integer:
                raise ValueError('Labels dtype should be integer '
                                 'Instead got %s.' % labels.dtype)
            assert_less = check_ops.assert_less(
                labels,
                ops.convert_to_tensor(self._n_classes, dtype=labels.dtype),
                message='Label IDs must < n_classes')
            assert_greater = check_ops.assert_non_negative(
                labels, message='Label Ids must >= 0')
            with ops.control_dependencies((assert_less, assert_greater)):
                labels = array_ops.identity(labels)

            unweighted_loss = losses.sparse_softmax_cross_entropy(
                labels=labels, logits=logits, reduction=losses.Reduction.NONE)
            # Restore the squeezed dim, so unweighted_loss matches the weights shape.
            unweighted_loss = array_ops.expand_dims(unweighted_loss,
                                                    axis=(1, ))
            weights = (1. if (self._weight_feature_key is None) else
                       features[self._weight_feature_key])
            weights = math_ops.to_float(weights, name='weights')
            training_loss = losses.compute_weighted_loss(
                unweighted_loss,
                weights=weights,
                reduction=losses.Reduction.SUM)
            if mode == model_fn.ModeKeys.EVAL:
                return model_fn.EstimatorSpec(
                    mode=model_fn.ModeKeys.EVAL,
                    predictions=predictions,
                    loss=training_loss,
                    eval_metric_ops=self._eval_metric_ops(
                        labels=labels,
                        probabilities=probabilities,
                        logits=logits,
                        class_ids=class_ids,
                        unweighted_loss=unweighted_loss,
                        weights=weights))

            # Train.
            if train_op_fn is None:
                raise ValueError('train_op_fn can not be None.')
            logging_ops.scalar_summary(metric_keys.MetricKeys.LOSS,
                                       training_loss)
            logging_ops.scalar_summary(
                metric_keys.MetricKeys.LOSS_MEAN,
                losses.compute_weighted_loss(unweighted_loss,
                                             weights=weights,
                                             reduction=losses.Reduction.MEAN))
            return model_fn.EstimatorSpec(mode=model_fn.ModeKeys.TRAIN,
                                          predictions=predictions,
                                          loss=training_loss,
                                          train_op=train_op_fn(training_loss))
Example #52
0
    def __init__(self,
                 name,
                 unfreeze_time=30000,
                 autoencode=False,
                 action_lr=1e-4,
                 deconv_lr=1e-3,
                 fwd_consist=False,
                 baseline_reg=False,
                 softmaxBackprop=True,
                 gtAction=False):
        self.unfreeze_time = unfreeze_time
        self.autoencode = autoencode
        self.gtAction = gtAction
        self.name = '{0}_{1}_{2}_{3}_{4}_{5}K_{6}_{7}'.format(
            name, 'fwdconsist' + str(fwd_consist),
            'baselinereg' + str(baseline_reg), 'deconv_lr' + str(deconv_lr),
            'autoencode' + str(autoencode),
            'unfreeze' + str(int(unfreeze_time / 1000.)),
            'softmax' + str(softmaxBackprop), 'gtAction' + str(gtAction))
        self.fwd_consist = fwd_consist
        self.start = 0

        self.batch_loader = rope_data

        self.image_ph = tf.placeholder(tf.float32, [None, 200, 200, 3],
                                       name='image_ph')
        self.goal_image_ph = tf.placeholder(tf.float32, [None, 200, 200, 3],
                                            name='goal_image_ph')
        self.location_ph = tf.placeholder(tf.float32, [None, LOCATION_BINS],
                                          name='location_ph')
        self.theta_ph = tf.placeholder(tf.float32, [None, THETA_BINS],
                                       name='theta_ph')
        self.length_ph = tf.placeholder(tf.float32, [None, LENGTH_BINS],
                                        name='length_ph')
        self.ignore_flag_ph = tf.placeholder(tf.float32, [None],
                                             name='ignore_flag_ph')
        self.is_training_ph = tf.placeholder(tf.bool, name='is_training_ph')
        self.autoencode_ph = tf.placeholder(tf.bool)
        self.gtAction_ph = tf.placeholder(tf.bool)

        # get latent representations for both the images
        latent_image, latent_conv5_image = alexnet_geurzhoy.network(
            self.image_ph, trainable=True, num_outputs=ENCODING_SIZE)
        latent_goal_image, latent_conv5_goal_image = alexnet_geurzhoy.network(
            self.goal_image_ph,
            trainable=True,
            num_outputs=ENCODING_SIZE,
            reuse=True)

        # concatenate the latent representations and share information
        features = tf.concat(1, [latent_image, latent_goal_image])

        with tf.variable_scope("concat_fc"):
            x = tf.nn.relu(features)
            x = slim.fully_connected(x, FEAT_SIZE, scope="concat_fc")

        #################################
        # ACTION PREDICTION
        #################################
        location_embedding = init_weights(
            'location_embedding', [LOCATION_BINS, LOCATION_EMBEDDING_SIZE])
        theta_embedding = init_weights('theta_embedding',
                                       [THETA_BINS, THETA_EMBEDDING_SIZE])

        # layer for predicting X, Y
        with tf.variable_scope('location_pred'):
            loc_network_layers = [FEATURE_SIZE, 200, 200, LOCATION_BINS]
            location_pred = make_network(x, loc_network_layers)
            location_loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    location_pred, self.location_ph))

            location_sample = math_ops.argmax(
                tf.cond(self.is_training_ph, lambda: self.location_ph,
                        lambda: location_pred), 1)
            location_embed = embedding_ops.embedding_lookup(
                location_embedding, location_sample)

        # layer for predicting theta
        with tf.variable_scope('theta_pred'):
            x_with_loc = tf.concat(1, [x, location_embed])
            theta_network_layers = [
                FEATURE_SIZE + LOCATION_EMBEDDING_SIZE, 200, 200, THETA_BINS
            ]
            theta_pred = make_network(x_with_loc, theta_network_layers)
            theta_loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    theta_pred, self.theta_ph))

            theta_sample = math_ops.argmax(
                tf.cond(self.is_training_ph, lambda: self.theta_ph,
                        lambda: theta_pred), 1)
            theta_embed = embedding_ops.embedding_lookup(
                theta_embedding, theta_sample)

        # layer for predicting length of movement
        with tf.variable_scope('length_pred'):
            x_with_loc_theta = tf.concat(1, [x_with_loc, theta_embed])
            length_network_layers = [
                FEATURE_SIZE + LOCATION_EMBEDDING_SIZE + THETA_EMBEDDING_SIZE,
                200, 200, LENGTH_BINS
            ]
            length_pred = make_network(x_with_loc_theta, length_network_layers)
            length_softmax = tf.nn.softmax_cross_entropy_with_logits(
                length_pred, self.length_ph)
            length_loss = tf.reduce_mean(length_softmax * self.ignore_flag_ph)

        # add to collections for retrieval
        tf.add_to_collection('location_logit', location_pred)
        tf.add_to_collection('theta_logit', theta_pred)
        tf.add_to_collection('len_logit', length_pred)

        # variables of only inverse model without features
        inv_vars_no_alex = [
            v for v in tf.trainable_variables() if 'alexnet' not in v.name
        ]
        print('Action prediction tensors consist {0} out of {1}'.format(
            len(inv_vars_no_alex), len(tf.trainable_variables())))

        total_loss = location_loss + theta_loss + length_loss

        action_optimizer = tf.train.AdamOptimizer(action_lr)

        action_grads, _ = zip(
            *action_optimizer.compute_gradients(total_loss, inv_vars_no_alex))
        action_grads, _ = tf.clip_by_global_norm(action_grads, GRAD_CLIP_NORM)
        action_grads = zip(action_grads, inv_vars_no_alex)

        action_grads_full, _ = zip(*action_optimizer.compute_gradients(
            total_loss, tf.trainable_variables()))
        action_grads_full, _ = tf.clip_by_global_norm(action_grads_full,
                                                      GRAD_CLIP_NORM)
        action_grads_full = zip(action_grads_full, tf.trainable_variables())

        #################################
        # FORWARD CONSISTENCY
        #################################
        if self.fwd_consist:
            with tf.variable_scope('fwd_consist'):
                if softmaxBackprop:
                    location_pred = tf.nn.softmax(location_pred)
                    theta_pred = tf.nn.softmax(theta_pred)
                    length_pred = tf.nn.softmax(length_pred)

                # baseline regularization => gradients flow only to alexnet, not action pred
                if baseline_reg:
                    print('baseline')
                    action_embed = tf.concat(
                        1, [self.location_ph, self.theta_ph, self.length_ph])
                else:
                    # fwd_consist => gradients flow through action prediction
                    latent_conv5_image = tf.stop_gradient(latent_conv5_image)
                    action_embed = tf.cond(
                        self.gtAction_ph,
                        lambda: tf.concat(1, [
                            self.location_ph, self.theta_ph, self.length_ph
                        ]), lambda: tf.concat(
                            1, [location_pred, theta_pred, length_pred]))

                action_embed = slim.fully_connected(action_embed, 363)
                action_embed = tf.reshape(action_embed, [-1, 11, 11, 3])
                # concat along depth
                fwd_features = tf.concat(3, [latent_conv5_image, action_embed])
                # deconvolution
                batch_size = tf.shape(fwd_features)[0]

                wt1 = tf.Variable(
                    tf.truncated_normal([5, 5, 64, 259], stddev=0.1))
                deconv1 = tf.nn.conv2d_transpose(fwd_features, wt1,
                                                 [batch_size, 22, 22, 64],
                                                 [1, 2, 2, 1])
                deconv1 = leaky_relu(deconv1, 0.2)
                wt2 = tf.Variable(
                    tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
                deconv2 = tf.nn.conv2d_transpose(deconv1, wt2,
                                                 [batch_size, 44, 44, 32],
                                                 [1, 2, 2, 1])
                deconv2 = leaky_relu(deconv2, 0.2)
                wt3 = tf.Variable(
                    tf.truncated_normal([5, 5, 3, 32], stddev=0.1))
                deconv3 = tf.nn.conv2d_transpose(deconv2, wt3,
                                                 [batch_size, 88, 88, 3],
                                                 [1, 2, 2, 1])
                deconv3 = tf.nn.tanh(deconv3)
                # loss from upsampled deconvolution and goal image
                upsampled_deconv_img = tf.image.resize_images(
                    deconv3, [200, 200])
                tf.add_to_collection('upsampled_deconv_img',
                                     upsampled_deconv_img)

                # image inputs are -255 to 255 ??? for some reason
                # whether to autoencode or not

                normalized_goal_img = tf.cond(
                    self.autoencode_ph, lambda: self.image_ph / 255.0,
                    lambda: self.goal_image_ph / 255.0)

                # just to visualize
                deconv_log_img = (upsampled_deconv_img + 1.0) * 127.5

                # variables of only forward model
                fwd_vars = [
                    v for v in tf.trainable_variables()
                    if 'fwd_consist' in v.name
                ]
                print('Forward consistency tensors consist {0} out of {1}'.
                      format(len(fwd_vars), len(tf.trainable_variables())))

                fwd_consist_loss = tf.reduce_mean(
                    tf.abs(upsampled_deconv_img - normalized_goal_img))
                deconv_optimizer = tf.train.AdamOptimizer(deconv_lr)

                fwd_consist_grads, _ = zip(*deconv_optimizer.compute_gradients(
                    fwd_consist_loss, fwd_vars))
                fwd_consist_grads, _ = tf.clip_by_global_norm(
                    fwd_consist_grads, GRAD_CLIP_NORM)
                fwd_consist_grads = zip(fwd_consist_grads, fwd_vars)

                fwd_consist_grads_full, _ = zip(
                    *deconv_optimizer.compute_gradients(
                        fwd_consist_loss, tf.trainable_variables()))
                fwd_consist_grads_full, _ = tf.clip_by_global_norm(
                    fwd_consist_grads_full, GRAD_CLIP_NORM)
                fwd_consist_grads_full = zip(fwd_consist_grads_full,
                                             tf.trainable_variables())

                self.optimize_fwd_freeze = deconv_optimizer.apply_gradients(
                    fwd_consist_grads)

                with tf.control_dependencies([
                        fwd_consist_grads_full[0][0][0],
                        action_grads_full[0][0][0]
                ]):
                    self.optimize_fwd_full = deconv_optimizer.apply_gradients(
                        fwd_consist_grads_full)
                    self.optimize_action_full = action_optimizer.apply_gradients(
                        action_grads_full)

        self.optimize_action_no_alex = action_optimizer.apply_gradients(
            action_grads)
        self.optimize_action_alex = action_optimizer.apply_gradients(
            action_grads_full)

        #################################
        # LOGGING AND SAVING OPERATIONS
        #################################
        loc_correct_pred = tf.equal(tf.argmax(location_pred, 1),
                                    tf.argmax(self.location_ph, 1))
        self.loc_accuracy = tf.reduce_mean(
            tf.cast(loc_correct_pred, tf.float32))

        theta_correct_pred = tf.equal(tf.argmax(theta_pred, 1),
                                      tf.argmax(self.theta_ph, 1))
        self.theta_accuracy = tf.reduce_mean(
            tf.cast(theta_correct_pred, tf.float32))

        length_correct_pred = tf.equal(tf.argmax(length_pred, 1),
                                       tf.argmax(self.length_ph, 1))
        self.length_accuracy = tf.reduce_mean(
            tf.cast(length_correct_pred, tf.float32))

        # logging
        tf.summary.scalar('model/location_loss',
                          location_loss,
                          collections=['train'])
        tf.summary.scalar('model/theta_loss',
                          theta_loss,
                          collections=['train'])
        tf.summary.scalar('model/length_loss',
                          length_loss,
                          collections=['train'])
        if self.fwd_consist:
            tf.summary.scalar('model/fwd_consist_loss',
                              fwd_consist_loss,
                              collections=['train'])
            tf.summary.image('upsampled_deconv_image',
                             deconv_log_img,
                             max_outputs=5,
                             collections=['train'])

        tf.summary.image('before', (self.image_ph + 255.0) / 2.0,
                         max_outputs=5,
                         collections=['train'])
        tf.summary.image('after', (self.goal_image_ph + 255.0) / 2.0,
                         max_outputs=5,
                         collections=['train'])

        self.train_summaries = tf.summary.merge_all('train')

        self.writer = tf.summary.FileWriter('./results/{0}/logs/{1}'.format(
            self.name, time.time()))

        self.saver = tf.train.Saver(max_to_keep=None)

        self.sess = tf.Session(config=CONFIG)
        self.sess.run(tf.global_variables_initializer())

        self.model_directory = './results/{0}/models/'.format(self.name)
        if not os.path.exists(self.model_directory):
            os.makedirs(self.model_directory)
Example #53
0
    def decoder_fn(time, cell_state, cell_input, cell_output, context_state):
        """Decoder function used in the `dynamic_rnn_decoder` for inference.

        The main difference between this decoder function and the `decoder_fn` in
        `attention_decoder_fn_train` is how `next_cell_input` is calculated. In
        decoder function we calculate the next input by applying an argmax across
        the feature dimension of the output from the decoder. This is a
        greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014)
        use beam-search instead.

        Args:
            time: positive integer constant reflecting the current timestep.
            cell_state: state of RNNCell.
            cell_input: input provided by `dynamic_rnn_decoder`.
            cell_output: output of RNNCell.
            context_state: context state provided by `dynamic_rnn_decoder`.

        Returns:
            A tuple (done, next state, next input, emit output, next context state)
            where:

            done: A boolean vector to indicate which sentences has reached a
            `end_of_sequence_id`. This is used for early stopping by the
            `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with
            all elements as `true` is returned.

            next state: `cell_state`, this decoder function does not modify the
            given state.

            next input: The embedding from argmax of the `cell_output` is used as
            `next_input`.

            emit output: If `output_fn is None` the supplied `cell_output` is
            returned, else the `output_fn` is used to update the `cell_output`
            before calculating `next_input` and returning `cell_output`.

            next context state: `context_state`, this decoder function does not
            modify the given context state. The context state could be modified when
            applying e.g. beam search.

        Raises:
            ValueError: if cell_input is not None.

        """
        with ops.name_scope(
                name, "attention_decoder_fn_inference",
            [time, cell_state, cell_input, cell_output, context_state]):
            if cell_input is not None:
                raise ValueError(
                    "Expected cell_input to be None, but saw: %s" % cell_input)

            if cell_output is None:
                # invariant that this is time == 0
                next_input_id = array_ops.ones([
                    batch_size,
                ], dtype=dtype) * (start_of_sequence_id)
                done = array_ops.zeros([
                    batch_size,
                ], dtype=dtypes.bool)
                cell_state = encoder_state
                cell_output = array_ops.zeros([num_decoder_symbols],
                                              dtype=dtypes.float32)
                word_input = array_ops.gather(embeddings, next_input_id)
                naf_triple_id = array_ops.zeros([batch_size, 2], dtype=dtype)
                triple_input = array_ops.gather_nd(imem[1], naf_triple_id)
                cell_input = array_ops.concat([word_input, triple_input],
                                              axis=1)
                # init attention
                attention = _init_attention(encoder_state)
                if imem is not None:  # added by our model
                    context_state = tensor_array_ops.TensorArray(
                        dtype=dtypes.int32,
                        tensor_array_name="output_ids_ta",
                        size=maximum_length,
                        dynamic_size=True,
                        infer_shape=False)
            else:
                # construct attention
                attention = attention_construct_fn(cell_output, attention_keys,
                                                   attention_values)

                if type(attention) is tuple:  # added by our model
                    attention, alignment = attention[0], attention[1]
                    cell_output = attention
                    alignment = tf.reshape(alignment, [batch_size, -1])
                    selector = selector_fn(cell_output)
                    selector = tf.sigmoid(selector)
                    logit = output_fn(cell_output)
                    # cell output
                    cell_output = logit

                    word_prob = nn_ops.softmax(logit) * (1 - selector)
                    entity_prob = alignment * selector
                    mask = array_ops.reshape(
                        math_ops.cast(math_ops.greater(
                            tf.reduce_max(word_prob, 1),
                            tf.reduce_max(entity_prob, 1)),
                                      dtype=dtypes.float32), [-1, 1])
                    word_input = mask * array_ops.gather(
                        embeddings,
                        math_ops.cast(math_ops.argmax(word_prob, 1),
                                      dtype=dtype)
                    ) + (1 - mask) * array_ops.gather_nd(
                        imem[0],
                        array_ops.concat([
                            array_ops.reshape(
                                math_ops.range(batch_size, dtype=dtype),
                                [-1, 1]),
                            array_ops.reshape(
                                math_ops.cast(math_ops.argmax(entity_prob, 1),
                                              dtype=dtype), [-1, 1])
                        ],
                                         axis=1))
                    indices = array_ops.concat([
                        array_ops.reshape(
                            math_ops.range(batch_size, dtype=dtype), [-1, 1]),
                        math_ops.cast(1 - mask, dtype=dtype) * tf.reshape(
                            math_ops.cast(math_ops.argmax(alignment, 1),
                                          dtype=dtype), [-1, 1])
                    ],
                                               axis=1)
                    triple_input = array_ops.gather_nd(imem[1], indices)

                    # cell input
                    cell_input = array_ops.concat([word_input, triple_input],
                                                  axis=1)

                    # done mask
                    mask = array_ops.reshape(math_ops.cast(mask, dtype=dtype),
                                             [-1])
                    input_id = mask * math_ops.cast(
                        math_ops.argmax(word_prob, 1), dtype=dtype) +\
                               (mask - 1) * math_ops.cast(math_ops.argmax(entity_prob, 1), dtype=dtype)
                    context_state = context_state.write(time - 1, input_id)
                    done = array_ops.reshape(
                        math_ops.equal(input_id, end_of_sequence_id), [-1])
                else:
                    cell_output = attention
                    # argmax decoder
                    cell_output = output_fn(cell_output)  # logits
                    next_input_id = math_ops.cast(math_ops.argmax(
                        cell_output, 1),
                                                  dtype=dtype)
                    done = math_ops.equal(next_input_id, end_of_sequence_id)
                    cell_input = array_ops.gather(embeddings, next_input_id)

            # combine cell_input and attention
            next_input = array_ops.concat([cell_input, attention], 1)

            # if time > maxlen, return all true vector
            done = control_flow_ops.cond(
                math_ops.greater(time, maximum_length),
                lambda: array_ops.ones([
                    batch_size,
                ], dtype=dtypes.bool), lambda: done)
            return (done, cell_state, next_input, cell_output, context_state)
Example #54
0
 def sample(self, time, outputs, name=None, **unused_kwargs):
     with ops.name_scope(name, "TrainingHelperSample", [time, outputs]):
         sample_ids = math_ops.argmax(outputs,
                                      axis=-1,
                                      output_type=tf.int32)
     return sample_ids
Example #55
0
def sparse_categorical_accuracy(y_true, y_pred):
    return math_ops.cast(
        math_ops.equal(
            math_ops.reduce_max(y_true, axis=-1),
            math_ops.cast(math_ops.argmax(y_pred, axis=-1), K.floatx())),
        K.floatx())
Example #56
0
 def mode(self, name="mode"):
   with ops.name_scope(self.name):
     with ops.op_scope([], name):
       ret = math_ops.argmax(self.logits, dimension=self._batch_rank)
       ret.set_shape(self.get_batch_shape())
       return ret
Example #57
0
def top_k_categorical_accuracy(y_true, y_pred, k=5):
    return K.mean(nn.in_top_k(y_pred, math_ops.argmax(y_true, axis=-1), k),
                  axis=-1)
Example #58
0
 def sample(self, time, outputs, state):
     del state
     sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1),
                                dtypes.int32)
     return sample_ids
Example #59
0
def random_forest_model_fn(features, labels, mode, params, config):
    """Function that returns predictions, training loss, and training op."""
    labels_tensor = labels
    if isinstance(labels, dict) and len(labels) == 1:
        labels_tensor = labels.values()[0]

    weights_name = params["weights_name"]
    keys_name = params["keys_name"]
    num_classes = tf.identity(params['num_classes'], name='num_classes')
    params_toGraphs = tensor_forest.ForestHParams(
        num_classes=params['num_classes'],
        num_features=params['num_features'],
        num_trees=params['num_trees'],
        max_nodes=params['max_nodes'],
        regression=params['regression'],
        split_after_samples=params['split_after_samples'])
    #  注意第90行 fill()
    # https://github.com/tensorflow/tensorflow/blob/r1.2/tensorflow/contrib
    # /tensor_forest/python/tensor_forest.py
    params_toGraphs = params_toGraphs.fill()
    graph_builder_class = tensor_forest.RandomForestGraphs

    early_stopping_rounds = params["early_stopping_rounds"]
    num_trainers = 1
    trainer_id = 0
    report_feature_importances = False
    model_dir = None
    local_eval = False
    device_assigner = None
    weights = None
    if weights_name and weights_name in features:
        weights = features.pop(weights_name)

    keys = None
    if keys_name and keys_name in features:
        keys = features.pop(keys_name)

    # If we're doing eval, optionally ignore device_assigner.
    # Also ignore device assigner if we're exporting (mode == INFER)
    dev_assn = device_assigner
    if (mode == model_fn_lib.ModeKeys.INFER
            or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
        dev_assn = None

    graph_builder = graph_builder_class(params_toGraphs,
                                        device_assigner=dev_assn)
    inference = {}
    predictions = {}
    output_alternatives = None
    # if (mode == model_fn_lib.ModeKeys.EVAL or
    #             mode == model_fn_lib.ModeKeys.INFER):
    if True:
        inference[eval_metrics.INFERENCE_PROB_NAME] = (
            graph_builder.inference_graph(features))

        if params_toGraphs.regression:
            predictions = {None: inference[eval_metrics.INFERENCE_PROB_NAME]}
            output_alternatives = {
                None: (constants.ProblemType.LINEAR_REGRESSION, predictions)
            }
        else:
            inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax(
                inference[eval_metrics.INFERENCE_PROB_NAME], 1)

            predictions = {
                prediction_key.PredictionKey.PROBABILITIES:
                inference[eval_metrics.INFERENCE_PROB_NAME],
                prediction_key.PredictionKey.CLASSES:
                inference[eval_metrics.INFERENCE_PRED_NAME]
            }
            output_alternatives = {
                None: (constants.ProblemType.CLASSIFICATION, predictions)
            }

        if report_feature_importances:
            inference[eval_metrics.FEATURE_IMPORTANCE_NAME] = (
                graph_builder.feature_importances())

        if keys is not None:
            inference[keys_name] = keys

    # labels might be None if we're doing prediction (which brings up the
    # question of why we force everything to adhere to a single model_fn).
    loss_deps = []
    training_graph = None
    training_hooks = []
    scaffold = None
    if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
        training_graph = control_flow_ops.group(
            graph_builder.training_graph(features,
                                         labels,
                                         input_weights=weights,
                                         num_trainers=num_trainers,
                                         trainer_id=trainer_id),
            state_ops.assign_add(contrib_framework.get_global_step(), 1))
        loss_deps.append(training_graph)
        if hasattr(graph_builder, 'finalize_training'):
            finalize_listener = EveryCheckpointPreSaveListener(
                graph_builder.finalize_training())
            scaffold = monitored_session.Scaffold()
            training_hooks.append(
                basic_session_run_hooks.CheckpointSaverHook(
                    model_dir,
                    save_secs=600,
                    save_steps=None,
                    scaffold=scaffold,
                    listeners=[finalize_listener]))

    training_loss = None
    if (mode == model_fn_lib.ModeKeys.EVAL
            or mode == model_fn_lib.ModeKeys.TRAIN):
        with ops.control_dependencies(loss_deps):
            training_loss = graph_builder.training_loss(
                features, labels, name='rf_training_loss')

    # 命名以传到 hook 中
    if not params['regression']:
        confusion_matrix_print = confusion_matrix(
            labels=labels_tensor,
            predictions=predictions['classes'],
            num_classes=num_classes,
        )

        confusion_matrix_print = tf.identity(confusion_matrix_print,
                                             name='confusion_matrix_print')
    else:
        confusion_matrix_print = tf.identity(0, name='confusion_matrix_print')

    regression_ornot = tf.identity(params['regression'],
                                   name='regression_ornot')
    # Put weights back in
    if weights is not None:
        features[weights_name] = weights

    if early_stopping_rounds:
        training_hooks.append(TensorForestLossHook(early_stopping_rounds))

    metrics = {}
    # metrics[metric_key.MetricKey.AUC] = metrics_lib.streaming_auc(
    #     labels=labels_tensor,
    #     predictions=inference[eval_metrics.INFERENCE_PRED_NAME]
    # )
    if not params_toGraphs.regression:
        metrics['eval_confusion_matrix'] = confusion_matrix(
            labels=labels_tensor,
            predictions=predictions['classes'],
            num_classes=params['num_classes'],
        )

    return model_fn_lib.ModelFnOps(mode=mode,
                                   predictions=inference,
                                   loss=training_loss,
                                   train_op=training_graph,
                                   training_hooks=training_hooks,
                                   scaffold=scaffold,
                                   eval_metric_ops=metrics,
                                   output_alternatives=output_alternatives)
Example #60
0
 def assignments(self):
   """Returns a list of Tensors with the matrix of assignments per shard."""
   ret = []
   for w in self._w:
     ret.append(math_ops.argmax(w, 1))
   return ret