Example #1
0
 def linear_decay_fn(global_step):
   if global_step is None:
     raise ValueError("global_step is required for linear_decay.")
   global_step = math_ops.minimum(global_step, decay_steps)
   remaining_steps = math_ops.to_int32(decay_steps) - math_ops.to_int32(
       global_step)
   decayed = math_ops.to_float(remaining_steps) / math_ops.to_float(
       decay_steps)
   return math_ops.maximum(0.0, decayed)
Example #2
0
 def _compute_accuracy(logits, targets, weights=None):
   if self._n_classes > 2:
     _, predictions = nn.top_k(logits, 1)
   else:
     predictions = array_ops.reshape(logits, [-1])
     predictions = math_ops.greater(predictions,
                                    array_ops.zeros_like(predictions))
     targets = array_ops.reshape(targets, [-1])
   return metrics_lib.streaming_accuracy(
       math_ops.to_int32(predictions), math_ops.to_int32(targets), weights)
def update_medoid_per_cluster(pairwise_distances, pairwise_distances_subset,
                              labels, chosen_ids, cluster_member_ids,
                              cluster_idx, margin_multiplier, margin_type):
  """Updates the cluster medoid per cluster.

  Args:
    pairwise_distances: 2-D Tensor of pairwise distances.
    pairwise_distances_subset: 2-D Tensor of pairwise distances for one cluster.
    labels: 1-D Tensor of ground truth cluster assignment.
    chosen_ids: 1-D Tensor of cluster centroid indices.
    cluster_member_ids: 1-D Tensor of cluster member indices for one cluster.
    cluster_idx: Index of this one cluster.
    margin_multiplier: multiplication constant.
    margin_type: Type of structured margin to use. Default is nmi.

  Returns:
    chosen_ids: Updated 1-D Tensor of cluster centroid indices.
  """

  def func_cond(iteration, scores_margin):
    del scores_margin  # Unused variable scores_margin.
    return iteration < num_candidates

  def func_body(iteration, scores_margin):
    # swap the current medoid with the candidate cluster member
    candidate_medoid = math_ops.to_int32(cluster_member_ids[iteration])
    tmp_chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, candidate_medoid)
    predictions = get_cluster_assignment(pairwise_distances, tmp_chosen_ids)
    metric_score = compute_clustering_score(labels, predictions, margin_type)
    pad_before = array_ops.zeros([iteration])
    pad_after = array_ops.zeros([num_candidates - 1 - iteration])
    return iteration + 1, scores_margin + array_ops.concat(
        [pad_before, [1.0 - metric_score], pad_after], 0)

  # pairwise_distances_subset is of size [p, 1, 1, p],
  #   the intermediate dummy dimensions at
  #   [1, 2] makes this code work in the edge case where p=1.
  #   this happens if the cluster size is one.
  scores_fac = -1.0 * math_ops.reduce_sum(
      array_ops.squeeze(pairwise_distances_subset, [1, 2]), axis=0)

  iteration = array_ops.constant(0)
  num_candidates = array_ops.size(cluster_member_ids)
  scores_margin = array_ops.zeros([num_candidates])

  _, scores_margin = control_flow_ops.while_loop(func_cond, func_body,
                                                 [iteration, scores_margin])
  candidate_scores = math_ops.add(scores_fac, margin_multiplier * scores_margin)

  argmax_index = math_ops.to_int32(
      math_ops.argmax(candidate_scores, dimension=0))

  best_medoid = math_ops.to_int32(cluster_member_ids[argmax_index])
  chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, best_medoid)
  return chosen_ids
Example #4
0
  def testListOfScalarTensors(self):
    a = math_ops.to_int32(5)
    b = math_ops.to_int32(6)

    value = np.random.rand(11, 11)

    with self.test_session(use_gpu=False) as sess:
      result = sess.run(array_ops.split(value, [a, b]))

    self.assertAllEqual(result[0], value[0:5, :])
    self.assertAllEqual(result[1], value[5:, :])
Example #5
0
 def _class_predictions_streaming_mean(
     predictions, labels, weights=None, class_id=None):
   del labels
   return metrics_lib.streaming_mean(
       array_ops.where(
           math_ops.equal(
               math_ops.to_int32(class_id),
               math_ops.to_int32(predictions)),
           array_ops.ones_like(predictions),
           array_ops.zeros_like(predictions)),
       weights=weights)
  def testListOfScalarTensors(self):
    a = math_ops.to_int32(5)
    b = math_ops.to_int32(6)

    value = np.random.rand(11, 11)

    with test_util.device(use_gpu=True):
      result = self.evaluate(array_ops.split(value, [a, b]))

    self.assertAllEqual(result[0], value[0:5, :])
    self.assertAllEqual(result[1], value[5:, :])
def _randomize(coeffs, radixes, seed=None):
  """Applies the Owen randomization to the coefficients."""
  given_dtype = coeffs.dtype
  coeffs = math_ops.to_int32(coeffs)
  num_coeffs = array_ops.shape(coeffs)[-1]
  radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1])
  perms = _get_permutations(num_coeffs, radixes, seed=seed)
  perms = array_ops.reshape(perms, [-1])
  radix_sum = math_ops.reduce_sum(radixes)
  radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True),
                                    [-1, 1])
  offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum
  permuted_coeffs = array_ops.gather(perms, coeffs + offsets)
  return math_ops.cast(permuted_coeffs, dtype=given_dtype)
 def _loss(probs, targets):
     one_hot_labels = array_ops.one_hot(math_ops.to_int32(targets),
                                        num_classes,
                                        on_value=1.,
                                        off_value=0.,
                                        dtype=dtypes.float32)
     return loss_fn(probs, one_hot_labels)
Example #9
0
    def _compute_power_svd(self, var, mat_g, mat_g_size, alpha,
                           mat_h_slot_name):
        """Computes mat_h = mat_g^alpha using svd. mat_g is a symmetric PSD matrix.

    Args:
      var: the variable we are updating.
      mat_g: the symmetric PSD matrix whose power it to be computed
      mat_g_size: size of mat_g
      alpha: a real number
      mat_h_slot_name: name of slot to store the power, if needed.

    Returns:
      mat_h = mat_g^alpha

    Stores mat_h in the appropriate slot, if it exists.
    Note that mat_g is PSD. So we could use linalg_ops.self_adjoint_eig.
    """
        if mat_g_size == 1:
            mat_h = math_ops.pow(mat_g + self._epsilon, alpha)
        else:
            damping = self._epsilon * linalg_ops.eye(
                math_ops.to_int32(mat_g_size))
            diag_d, mat_u, mat_v = linalg_ops.svd(mat_g + damping,
                                                  full_matrices=True)
            mat_h = math_ops.matmul(
                mat_v *
                math_ops.pow(math_ops.maximum(diag_d, self._epsilon), alpha),
                array_ops.transpose(mat_u))
        if mat_h_slot_name is not None:
            return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h)
        return mat_h
Example #10
0
 def has_zero():
     # Insert a zero in the consecutive ids where zero appears in unique_ids.
     # id_is_zero has length 1.
     zero_id_ind = math_ops.to_int32(id_is_zero[0])
     ids_before = nonzero_consecutive_ids[:zero_id_ind]
     ids_after = nonzero_consecutive_ids[zero_id_ind:]
     return array_ops.concat([ids_before, [0], ids_after], axis=0)
    def finalize(self, outputs, final_state, sequence_lengths):
        """Finalize and return the predicted_ids.

    Args:
      outputs: An instance of BeamSearchDecoderOutput.
      final_state: An instance of BeamSearchDecoderState. Passed through to the
        output.
      sequence_lengths: An `int64` tensor shaped `[batch_size, beam_width]`.
        The sequence lengths determined for each beam during decode.
        **NOTE** These are ignored; the updated sequence lengths are stored in
        `final_state.lengths`.

    Returns:
      outputs: An instance of `FinalBeamSearchDecoderOutput` where the
        predicted_ids are the result of calling _gather_tree.
      final_state: The same input instance of `BeamSearchDecoderState`.
    """
        del sequence_lengths
        # Get max_sequence_length across all beams for each batch.
        max_sequence_lengths = math_ops.to_int32(
            math_ops.reduce_max(final_state.lengths, axis=1))
        predicted_ids = beam_search_ops.gather_tree(
            outputs.predicted_ids,
            outputs.parent_ids,
            max_sequence_lengths=max_sequence_lengths,
            end_token=self._end_token)
        outputs = FinalBeamSearchDecoderOutput(
            beam_search_decoder_output=outputs, predicted_ids=predicted_ids)
        return outputs, final_state
Example #12
0
def _compute_zeroone_score(labels, predictions):
    zeroone_score = math_ops.to_float(
        math_ops.equal(
            math_ops.reduce_sum(
                math_ops.to_int32(math_ops.equal(labels, predictions))),
            array_ops.shape(labels)[0]))
    return zeroone_score
 def testGradientWithIntegerPath(self):
     x = constant_op.constant([3.9, 4.1])
     k = math_ops.to_float(math_ops.to_int32(x))
     y = x * k
     dy_dx, = gradients_impl.gradients(y, x)
     with self.test_session() as sess:
         self.assertAllClose([3., 4.], sess.run(dy_dx))
Example #14
0
    def average_impurity(self):
        """Constructs a TF graph for evaluating the average leaf impurity of a tree.

    If in regression mode, this is the leaf variance. If in classification mode,
    this is the gini impurity.

    Returns:
      The last op in the graph.
    """
        children = array_ops.squeeze(array_ops.slice(self.variables.tree,
                                                     [0, 0], [-1, 1]),
                                     squeeze_dims=[1])
        is_leaf = math_ops.equal(constants.LEAF_NODE, children)
        leaves = math_ops.to_int32(
            array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1]))
        counts = array_ops.gather(self.variables.node_sums, leaves)
        gini = self._weighted_gini(counts)

        # Guard against step 1, when there often are no leaves yet.
        def impurity():
            return gini

        # Since average impurity can be used for loss, when there's no data just
        # return a big number so that loss always decreases.
        def big():
            return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000.

        return control_flow_ops.cond(
            math_ops.greater(array_ops.shape(leaves)[0], 0), impurity, big)
def gather_tree_from_array(t, parent_ids, sequence_length):
    """Calculates the full beams for `TensorArray`s.

  Args:
    t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of
      shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]`
      where `s` is the depth shape.
    parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`.
    sequence_length: The sequence length of shape `[batch_size, beam_width]`.

  Returns:
    A `Tensor` which is a stacked `TensorArray` of the same size and type as
    `t` and where beams are sorted in each `Tensor` according to `parent_ids`.
  """
    max_time = parent_ids.shape.dims[0].value or array_ops.shape(parent_ids)[0]
    batch_size = parent_ids.shape.dims[1].value or array_ops.shape(
        parent_ids)[1]
    beam_width = parent_ids.shape.dims[2].value or array_ops.shape(
        parent_ids)[2]

    # Generate beam ids that will be reordered by gather_tree.
    beam_ids = array_ops.expand_dims(
        array_ops.expand_dims(math_ops.range(beam_width), 0), 0)
    beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1])

    max_sequence_lengths = math_ops.to_int32(
        math_ops.reduce_max(sequence_length, axis=1))
    sorted_beam_ids = beam_search_ops.gather_tree(
        step_ids=beam_ids,
        parent_ids=parent_ids,
        max_sequence_lengths=max_sequence_lengths,
        end_token=beam_width + 1)

    # For out of range steps, simply copy the same beam.
    in_bound_steps = array_ops.transpose(array_ops.sequence_mask(
        sequence_length, maxlen=max_time),
                                         perm=[2, 0, 1])
    sorted_beam_ids = array_ops.where(in_bound_steps,
                                      x=sorted_beam_ids,
                                      y=beam_ids)

    # Generate indices for gather_nd.
    time_ind = array_ops.tile(
        array_ops.reshape(math_ops.range(max_time), [-1, 1, 1]),
        [1, batch_size, beam_width])
    batch_ind = array_ops.tile(
        array_ops.reshape(math_ops.range(batch_size), [-1, 1, 1]),
        [1, max_time, beam_width])
    batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2])
    indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1)

    # Gather from a tensor with collapsed additional dimensions.
    gather_from = t
    final_shape = array_ops.shape(gather_from)
    gather_from = array_ops.reshape(gather_from,
                                    [max_time, batch_size, beam_width, -1])
    ordered = array_ops.gather_nd(gather_from, indices)
    ordered = array_ops.reshape(ordered, final_shape)

    return ordered
Example #16
0
def one_hot_mask(labels, num_classes, scope=None):
  """Compute 1-hot encodings for masks.

  Given a label image, this computes the one hot encoding at
  each pixel.

  Args:
    labels: (batch_size, width, height, 1) tensor containing labels.
    num_classes: number of classes
    scope: optional scope name

  Returns:
    Tensor of shape (batch_size, width, height, num_classes) with
    a 1-hot encoding.
  """
  with ops.name_scope(scope, "OneHotMask", [labels]):
    height, width, depth = _shape(labels)
    assert depth == 1
    sparse_labels = math_ops.to_int32(array_ops.reshape(labels, [-1, 1]))
    sparse_size, _ = _shape(sparse_labels)
    indices = array_ops.reshape(math_ops.range(0, sparse_size, 1), [-1, 1])
    concated = array_ops.concat([indices, sparse_labels], 1)
    dense_result = sparse_ops.sparse_to_dense(concated,
                                              [sparse_size, num_classes], 1.0,
                                              0.0)
    result = array_ops.reshape(dense_result, [height, width, num_classes])
    return result
def confusion_matrix(predictions, labels, num_classes=None, name=None):
    """Computes the confusion matrix from predictions and labels

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    name: Scope name.

  Returns:
    A l X l matrix represeting the confusion matrix, where l in the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
    with ops.op_scope([predictions, labels, num_classes], name,
                      'confusion_matrix') as name:
        predictions = ops.convert_to_tensor(predictions,
                                            name='predictions',
                                            dtype=dtypes.int64)
        labels = ops.convert_to_tensor(labels,
                                       name='labels',
                                       dtype=dtypes.int64)

        if num_classes is None:
            num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                           math_ops.reduce_max(labels)) + 1

        shape = array_ops.pack([num_classes, num_classes])
        indices = array_ops.transpose(array_ops.pack([predictions, labels]))
        values = array_ops.ones_like(predictions, dtype=dtypes.int32)
        cm_sparse = ops.SparseTensor(indices=indices,
                                     values=values,
                                     shape=shape)
        zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtypes.int32)

        return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Example #18
0
def one_hot_mask(labels, num_classes, scope=None):
    """Compute 1-hot encodings for masks.

  Given a label image, this computes the one hot encoding at
  each pixel.

  Args:
    labels: (batch_size, width, height, 1) tensor containing labels.
    num_classes: number of classes
    scope: optional scope name

  Returns:
    Tensor of shape (batch_size, width, height, num_classes) with
    a 1-hot encoding.
  """
    with ops.name_scope(scope, "OneHotMask", [labels]):
        height, width, depth = _shape(labels)
        assert depth == 1
        sparse_labels = math_ops.to_int32(array_ops.reshape(labels, [-1, 1]))
        sparse_size, _ = _shape(sparse_labels)
        indices = array_ops.reshape(math_ops.range(0, sparse_size, 1), [-1, 1])
        concated = array_ops.concat_v2([indices, sparse_labels], 1)
        dense_result = sparse_ops.sparse_to_dense(concated,
                                                  [sparse_size, num_classes],
                                                  1.0, 0.0)
        result = array_ops.reshape(dense_result, [height, width, num_classes])
        return result
Example #19
0
 def body(i, newe, score):
     a = memories_iter.read(i)
     olde = e_iter.read(i)
     b = tf.tile(tf.expand_dims(olde, 0),
                 [self.max_sentence_len, 1])
     c = aspect_inputs_iter.read(i)
     l = math_ops.to_int32(sentence_lens_iter.read(i))
     g = tf.matmul(
         weights['attention'][h],
         tf.transpose(tf.concat([a, b, c], 1),
                      perm=[1, 0])) + biases['attention'][h]
     score_temp = tf.concat([
         tf.nn.softmax(tf.slice(g, [0, 0], [1, l])),
         tf.zeros([1, self.max_sentence_len - l])
     ], 1)
     score = score.write(i, score_temp)
     i_AL = tf.reshape(tf.matmul(score_temp, a), [-1, 1])
     olde = tf.reshape(olde, [-1, 1])
     r = tf.nn.sigmoid(
         tf.matmul(weights['gru_r'], i_AL) +
         tf.matmul(updates['gru_r'], olde))
     z = tf.nn.sigmoid(
         tf.matmul(weights['gru_z'], i_AL) +
         tf.matmul(updates['gru_z'], olde))
     e0 = tf.nn.tanh(
         tf.matmul(weights['gru_x'], i_AL) +
         tf.matmul(weights['gru_g'], tf.multiply(r, olde)))
     newe_temp = tf.multiply(1 - z, olde) + tf.multiply(z, e0)
     newe = newe.write(i, newe_temp)
     return (i + 1, newe, score)
Example #20
0
def _inplace_helper(x, i, v, op):
  """Applies an inplace op on (x, i, v).

  op is one of gen_array_ops.alias_inplace_update,
  gen_array_ops.alias_inplace_add, or gen_array_ops.alias_inplace_sub.

  If i is None, x and v must be the same shape. Computes
    x op v;
  If i is a scalar, x has a rank 1 higher than v's. Computes
    x[i, :] op v;
  Otherwise, x and v must have the same rank. Computes
    x[i, :] op v;

  Args:
    x: A Tensor.
    i: None, a scalar or a vector.
    v: A Tensor.
    op: alias_inplace_update, alias_inplace_add, or alias_inplace_sub.

  Returns:
    Returns x.

  """
  x = ops.convert_to_tensor(x)
  v = ops.convert_to_tensor(v, x.dtype)
  if i is None:
    # Full tensor.
    return array_ops.reshape(
        op(array_ops.reshape(x, [1, -1]), [0], array_ops.reshape(v, [1, -1])),
        array_ops.shape(x))
  i = math_ops.to_int32(i)
  if i.get_shape().ndims == 0:
    # Single 0-dim update.
    return op(x, array_ops.reshape(i, [1]), array_ops.expand_dims(v, 0))
  return op(x, i, v)
Example #21
0
  def get_best(self, n):
    """Return the indices and values of the n highest scores in the TopN."""

    def refresh_shortlist():
      """Update the shortlist with the highest scores in id_to_score."""
      new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size)
      smallest_new_score = math_ops.reduce_min(new_scores)
      new_length = math_ops.reduce_sum(
          math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min)))
      u1 = self.sl_ids.assign(
          math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0)))
      u2 = self.sl_scores.assign(
          array_ops.concat([[smallest_new_score], new_scores], 0))
      self.last_ops = [u1, u2]
      return control_flow_ops.group(u1, u2)

    # We only need to refresh the shortlist if n is greater than the
    # current shortlist size (which is stored in sl_ids[0]).
    with ops.control_dependencies(self.last_ops):
      cond_op = control_flow_ops.cond(n > self.sl_ids[0], refresh_shortlist,
                                      control_flow_ops.no_op)
      with ops.control_dependencies([cond_op]):
        topk_values, topk_indices = nn_ops.top_k(
            self.sl_scores,
            math_ops.minimum(n, math_ops.to_int32(self.sl_ids[0])))
        # topk_indices are the indices into the shortlist, we want to return
        # the indices into id_to_score
        gathered_indices = array_ops.gather(self.sl_ids, topk_indices)
        return gathered_indices, topk_values
Example #22
0
            def defun_fn(x):
                @function.Defun(dtypes.int32)
                def defun_fn_deep(x):
                    return constant_op.constant(1000) + math_ops.to_int32(x)

                return constant_op.constant(11000) + defun_fn_deep(
                    math_ops.to_int32(x))
Example #23
0
  def _compute_power_svd(self, var, mat_g, mat_g_size, alpha, mat_h_slot_name):
    """Computes mat_h = mat_g^alpha using svd. mat_g is a symmetric PSD matrix.

    Args:
      var: the variable we are updating.
      mat_g: the symmetric PSD matrix whose power it to be computed
      mat_g_size: size of mat_g
      alpha: a real number
      mat_h_slot_name: name of slot to store the power, if needed.

    Returns:
      mat_h = mat_g^alpha

    Stores mat_h in the appropriate slot, if it exists.
    Note that mat_g is PSD. So we could use linalg_ops.self_adjoint_eig.
    """
    if mat_g_size == 1:
      mat_h = math_ops.pow(mat_g + self._epsilon, alpha)
    else:
      damping = self._epsilon * linalg_ops.eye(math_ops.to_int32(mat_g_size))
      diag_d, mat_u, mat_v = linalg_ops.svd(mat_g + damping, full_matrices=True)
      mat_h = math_ops.matmul(
          mat_v * math_ops.pow(math_ops.maximum(diag_d, self._epsilon), alpha),
          array_ops.transpose(mat_u))
    if mat_h_slot_name is not None:
      return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h)
    return mat_h
Example #24
0
 def has_zero():
   # Insert a zero in the consecutive ids where zero appears in unique_ids.
   # id_is_zero has length 1.
   zero_id_ind = math_ops.to_int32(id_is_zero[0])
   ids_before = nonzero_consecutive_ids[:zero_id_ind]
   ids_after = nonzero_consecutive_ids[zero_id_ind:]
   return array_ops.concat([ids_before, [0], ids_after], axis=0)
Example #25
0
    def Forward(*args):
      """Forward pass of the recurrent net."""
      theta, state0, inputs, max_input_length, extras = _Pack(args, forward_sig)

      slen_dim = _SeqLenDim(inputs)

      # Creates accumulators for state0 and extras.
      acc_state = _EmptyAcc(slen_dim, state0)
      acc_extras = _EmptyAcc(slen_dim, extras)

      t = slen_dim - max_input_length if self._aligned_end else 0
      dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t)
      run = functional_ops.For(
          start=t,
          limit=slen_dim if self._aligned_end else max_input_length,
          delta=1,
          inputs=[dev_t] + _Flatten(
              [theta, state0, inputs, acc_state, acc_extras]),
          body=ForwardLoopBody,
          rewrite_with_while=compiled)
      _, state1, _, acc_state, acc_extras = _Pack(
          run[1:],
          [self._theta, self._state, self._inputs, self._state, self._extras])

      return _Flatten([acc_state, state1, acc_extras])
  def finalize(self, outputs, final_state, sequence_lengths):
    """Finalize and return the predicted_ids.

    Args:
      outputs: An instance of BeamSearchDecoderOutput.
      final_state: An instance of BeamSearchDecoderState. Passed through to the
        output.
      sequence_lengths: An `int64` tensor shaped `[batch_size, beam_width]`.
        The sequence lengths determined for each beam during decode.
        **NOTE** These are ignored; the updated sequence lengths are stored in
        `final_state.lengths`.

    Returns:
      outputs: An instance of `FinalBeamSearchDecoderOutput` where the
        predicted_ids are the result of calling _gather_tree.
      final_state: The same input instance of `BeamSearchDecoderState`.
    """
    del sequence_lengths
    # Get max_sequence_length across all beams for each batch.
    max_sequence_lengths = math_ops.to_int32(
        math_ops.reduce_max(final_state.lengths, axis=1))
    predicted_ids = beam_search_ops.gather_tree(
        outputs.predicted_ids,
        outputs.parent_ids,
        max_sequence_lengths=max_sequence_lengths,
        end_token=self._end_token)
    outputs = FinalBeamSearchDecoderOutput(
        beam_search_decoder_output=outputs, predicted_ids=predicted_ids)
    return outputs, final_state
 def testGradientWithIntegerPath(self):
   x = constant_op.constant([3.9, 4.1])
   k = math_ops.to_float(math_ops.to_int32(x))
   y = x * k
   dy_dx, = gradients_impl.gradients(y, x)
   with self.cached_session() as sess:
     self.assertAllClose([3., 4.], sess.run(dy_dx))
Example #28
0
def _compute_zeroone_score(labels, predictions):
  zeroone_score = math_ops.to_float(
      math_ops.equal(
          math_ops.reduce_sum(
              math_ops.to_int32(math_ops.equal(labels, predictions))),
          array_ops.shape(labels)[0]))
  return zeroone_score
Example #29
0
    def Backward(*args):
      """Backward pass for the recurrent net."""
      # theta, state0, inputs are Forward's inputs.
      # acc_state is the accumulated 1st output of Forward.
      # acc_extras is the accumulated 2nd output of Forward.
      # d_acc_state is the gradient for acc_state.
      # d_state1 is the gradient for the final state computed by Forward.
      (theta, state0, inputs, max_input_length, acc_state, acc_extras,
       d_acc_state, d_state1) = _Pack(args, backward_sig)

      # Accumulators for gradients.
      d_theta = _EmptyLike(theta)
      d_inputs = _EmptyLike(inputs)

      # Loop backwards. Note the loop's limit is open-ended, so goes through
      # t=0.
      t = max_input_length - 1
      dev_t = math_ops.to_int32(t) if use_tpu else math_ops.to_int64(t)
      run = functional_ops.For(
          start=t,
          limit=-1,
          delta=-1,
          inputs=[dev_t] + _Flatten([
              theta, state0, inputs, acc_state, acc_extras, d_theta, d_state1,
              d_inputs, d_acc_state
          ]),
          body=BackwardLoopBody,
          rewrite_with_while=compiled)

      (theta, state0, inputs, acc_state, acc_extras, d_theta, d_state0,
       d_inputs, d_acc_state) = _Pack(run[1:], bakloop_sig)

      d_max_input_length = array_ops.constant(0, dtype=max_input_length.dtype)
      return _Flatten(
          [d_theta, d_state0, d_inputs, d_max_input_length, acc_extras])
Example #30
0
  def average_impurity(self):
    """Constructs a TF graph for evaluating the average leaf impurity of a tree.

    If in regression mode, this is the leaf variance. If in classification mode,
    this is the gini impurity.

    Returns:
      The last op in the graph.
    """
    children = array_ops.squeeze(array_ops.slice(
        self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
    is_leaf = math_ops.equal(constants.LEAF_NODE, children)
    leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf),
                                                 squeeze_dims=[1]))
    counts = array_ops.gather(self.variables.node_sums, leaves)
    gini = self._weighted_gini(counts)
    # Guard against step 1, when there often are no leaves yet.
    def impurity():
      return gini
    # Since average impurity can be used for loss, when there's no data just
    # return a big number so that loss always decreases.
    def big():
      return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000.
    return control_flow_ops.cond(math_ops.greater(
        array_ops.shape(leaves)[0], 0), impurity, big)
Example #31
0
def _Update(struct_acc, struct_x, t):
  """Updates t-th row in accumulators.

  Args:
    struct_acc: The accumulators. A structure of tensors.
    struct_x: The new values. A structure of tensors congruent to `struct_acc`.
    t: A scalar integer. Performance is better if `t` is on the device
      memory.

  Returns:
    A structure of tensors. Say, ret is a returned dictionary. Then, for
    each key, we have:
      ret[key] = struct_acc[key];
      ret[key][t, :] = struct_x[key]
  """
  to_skip_update = set()
  acc_lst = nest.flatten(struct_acc)
  x_lst = nest.flatten(struct_x)
  t = math_ops.to_int32([t])  # tf.to_int32 casts on-device tensors.
  lst = []
  for acc, x in zip(acc_lst, x_lst):
    if acc in to_skip_update:
      # Until b/62105730 is fixed, we need to avoid inplace update for tensors
      # of rank 1.  could reshape to handle it, but we don't really need the
      # values applied to these, so just skip their modification.
      lst += [acc]
    else:
      lst += [alias_inplace_update(acc, t, array_ops.expand_dims(x, 0))]
  return nest.pack_sequence_as(struct_acc, lst)
      def defun_fn(x):

        @function.Defun(dtypes.int32)
        def defun_fn_deep(x):
          return constant_op.constant(1000) + math_ops.to_int32(x)

        return constant_op.constant(11000) + defun_fn_deep(math_ops.to_int32(x))
Example #33
0
        def body(i, prev_c, prev_h, actions, log_probs):
            # pylint: disable=g-long-lambda
            signal = control_flow_ops.cond(
                math_ops.equal(i, 0), lambda: array_ops.tile(
                    device_go_embedding, [self.hparams.num_children, 1]),
                lambda: embedding_ops.embedding_lookup(device_embeddings,
                                                       actions.read(i - 1)))
            if self.hparams.keep_prob is not None:
                signal = nn_ops.dropout(signal, self.hparams.keep_prob)
            next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
            query = math_ops.matmul(next_h, attn_w_2)
            query = array_ops.reshape(
                query,
                [self.hparams.num_children, 1, self.hparams.hidden_size])
            query = math_ops.tanh(query + attn_mem)
            query = array_ops.reshape(query, [
                self.hparams.num_children * self.num_groups,
                self.hparams.hidden_size
            ])
            query = math_ops.matmul(query, attn_v)
            query = array_ops.reshape(
                query, [self.hparams.num_children, self.num_groups])
            query = nn_ops.softmax(query)
            query = array_ops.reshape(
                query, [self.hparams.num_children, self.num_groups, 1])
            query = math_ops.reduce_sum(attn_mem * query, axis=1)
            query = array_ops.concat([next_h, query], axis=1)
            logits = math_ops.matmul(query, device_softmax)
            logits /= self.hparams.temperature
            if self.hparams.tanh_constant > 0:
                logits = math_ops.tanh(logits) * self.hparams.tanh_constant
            if self.hparams.logits_std_noise > 0:
                num_in_logits = math_ops.cast(array_ops.size(logits),
                                              dtype=dtypes.float32)
                avg_norm = math_ops.divide(linalg_ops.norm(logits),
                                           math_ops.sqrt(num_in_logits))
                logits_noise = random_ops.random_normal(
                    array_ops.shape(logits),
                    stddev=self.hparams.logits_std_noise * avg_norm)
                logits = control_flow_ops.cond(
                    self.global_step > self.hparams.stop_noise_step,
                    lambda: logits, lambda: logits + logits_noise)

            if mode == "sample":
                next_y = random_ops.multinomial(logits,
                                                1,
                                                seed=self.hparams.seed)
            elif mode == "greedy":
                next_y = math_ops.argmax(logits, 1)
            elif mode == "target":
                next_y = array_ops.slice(y, [0, i], [-1, 1])
            else:
                raise NotImplementedError
            next_y = math_ops.to_int32(next_y)
            next_y = array_ops.reshape(next_y, [self.hparams.num_children])
            actions = actions.write(i, next_y)
            log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=next_y)
            return i + 1, next_c, next_h, actions, log_probs
def confusion_matrix(predictions, labels, num_classes=None,
                     dtype=dtypes.int32, name=None):
  """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    dtype: Data type of the confusion matrix.
    name: Scope name.

  Returns:
    A k X k matrix represeting the confusion matrix, where k is the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
  with ops.name_scope(name, 'confusion_matrix',
                      [predictions, labels, num_classes]) as name:
    predictions, labels = metric_ops_util.remove_squeezable_dimensions(
        ops.convert_to_tensor(
            predictions, name='predictions', dtype=dtypes.int64),
        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1

    shape = array_ops.pack([num_classes, num_classes])
    indices = array_ops.transpose(array_ops.pack([predictions, labels]))
    values = array_ops.ones_like(predictions, dtype)
    cm_sparse = ops.SparseTensor(
        indices=indices, values=values, shape=shape)
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
def gather_tree_from_array(t, parent_ids, sequence_length):
  """Calculates the full beams for `TensorArray`s.

  Args:
    t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of
      shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]`
      where `s` is the depth shape.
    parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`.
    sequence_length: The sequence length of shape `[batch_size, beam_width]`.

  Returns:
    A `Tensor` which is a stacked `TensorArray` of the same size and type as
    `t` and where beams are sorted in each `Tensor` according to `parent_ids`.
  """
  max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0]
  batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1]
  beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2]

  # Generate beam ids that will be reordered by gather_tree.
  beam_ids = array_ops.expand_dims(
      array_ops.expand_dims(math_ops.range(beam_width), 0), 0)
  beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1])

  mask = array_ops.sequence_mask(
      sequence_length, maxlen=max_time, dtype=dtypes.int32)
  mask = array_ops.transpose(mask, perm=[2, 0, 1])

  # Use beam_width + 1 to mark the end of beam.
  masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1)

  max_sequence_lengths = math_ops.to_int32(
      math_ops.reduce_max(sequence_length, axis=1))
  sorted_beam_ids = beam_search_ops.gather_tree(
      step_ids=masked_beam_ids,
      parent_ids=parent_ids,
      max_sequence_lengths=max_sequence_lengths,
      end_token=beam_width + 1)

  # For out of range steps, simply copy the same beam.
  sorted_beam_ids = array_ops.where(
      math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids)

  # Generate indices for gather_nd.
  time_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width])
  batch_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width])
  batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2])
  indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1)

  # Gather from a tensor with collapsed additional dimensions.
  gather_from = t
  final_shape = array_ops.shape(gather_from)
  gather_from = array_ops.reshape(
      gather_from, [max_time, batch_size, beam_width, -1])
  ordered = array_ops.gather_nd(gather_from, indices)
  ordered = array_ops.reshape(ordered, final_shape)

  return ordered
Example #36
0
    def __call__(self, inputs, state, scope=None):
        batch_size = array_ops.shape(inputs)[0]

        ### Unpack state
        # last_betas: bs x V
        # last_index : bs
        last_beta, last_index = array_ops.split(
            state, [self._transitions.num_tags, 1], axis=1)
        last_index = math_ops.cast(last_index, dtypes.int32)

        ### Unpack inputs
        # unary:   bs x V
        # last_beta bs x V
        shape = [
            self._transitions.num_tags, self._transitions.num_tags,
            self._transitions._total_nr_parameters
        ]
        unary, beta, pairwise_flat = array_ops.split(inputs, shape, axis=1)

        ### Construct logits of this timestep according to (6) in the paper
        batch_indices = array_ops.reshape(math_ops.range(batch_size), [-1, 1])

        pairwise = self._transitions.get_pairwise_given_start(
            pairwise_flat, last_index)
        # bs x V
        last_beta = array_ops.gather_nd(
            last_beta, array_ops.concat([batch_indices, last_index], axis=1))
        last_beta = array_ops.reshape(last_beta, [-1, 1])
        logits = pairwise + unary + beta - last_beta  # NOTE this is only valid for the index we are gathering from

        # bs x V
        log_probs = nn_ops.log_softmax(logits)
        # bs x 1
        entropy = -math_ops.reduce_sum(
            log_probs * math_ops.exp(log_probs), axis=1, keepdims=True)

        ### Sample the next symbol
        # bs x 1
        new_indices = random_ops.multinomial(logits, 1)
        new_indices = math_ops.to_int32(new_indices)

        ### Gather the logits of the new symbol to return the sequence probability
        gather_indices = array_ops.concat(
            [batch_indices,
             array_ops.reshape(new_indices, [-1, 1])], axis=1)

        # bs x 1
        output_logits = array_ops.gather_nd(logits, gather_indices)
        output_logits = array_ops.reshape(output_logits, [-1, 1])

        ### Pack the new state
        new_state = array_ops.concat(
            [beta, math_ops.to_float(new_indices)], axis=1)

        output = array_ops.concat(
            [math_ops.to_float(new_indices), output_logits, entropy, logits],
            axis=1)

        return output, new_state
Example #37
0
def _GatherV2Grad(op, grad):
    """Gradient for GatherV2 op."""
    # params can be large, so colocate the shape calculation with it.
    #
    # params can be very large for sparse model, array_ops.shape raises
    # exception on the Windows platform when any dimension is larger than
    # int32. params_shape is not used in optimizer apply_sparse gradients,
    # so it's fine to convert it back to int32 regardless of truncation.
    params = op.inputs[0]
    with ops.colocate_with(params):
        params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
        params_shape = math_ops.to_int32(params_shape)

    indices = op.inputs[1]
    indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
    axis = op.inputs[2]
    axis_static = tensor_util.constant_value(axis)

    # For axis 0 gathers, build an appropriately shaped IndexedSlices.
    if axis_static == 0:
        values_shape = array_ops.concat([indices_size, params_shape[1:]], 0)
        values = array_ops.reshape(grad, values_shape)
        indices = array_ops.reshape(indices, indices_size)
        return [ops.IndexedSlices(values, indices, params_shape), None, None]

    outer_shape = params_shape[:axis]
    outer_dims = array_ops.size(outer_shape)
    inner_shape = params_shape[axis:][1:]
    inner_dims = array_ops.size(inner_shape)

    outer_axes_indices = math_ops.range(outer_dims)
    inner_axes_indices = math_ops.range(outer_dims + 1,
                                        outer_dims + 1 + inner_dims)

    values_shape = array_ops.concat([outer_shape, indices_size, inner_shape],
                                    0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)

    # We need to sum up every slice `values[..., i, ....]` corresponding to
    # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
    # support an axis parameter, we transpose the gather dimension to the front,
    # then use `unsorted_segment_sum` to build a
    # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
    # affecting each index in `gather_axis` summed up.
    transpose_dims = array_ops.concat(
        [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
    values_transpose = array_ops.transpose(values, transpose_dims)
    num_segments = params_shape[axis]

    params_grad = math_ops.unsorted_segment_sum(values_transpose, indices,
                                                num_segments)

    # Inverts the above transpose by moving dimension 0 back to its original
    # position.
    invert_transpose_dims = array_ops.concat(
        [outer_axes_indices + 1, [0], inner_axes_indices], 0)
    params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
    return [params_grad, None, None]
Example #38
0
 def _loss(probs, targets):
     one_hot_labels = array_ops.one_hot(
         math_ops.to_int32(targets),
         num_classes,
         on_value=1.,
         off_value=0.,
         dtype=dtypes.float32)
     return loss_fn(probs, one_hot_labels)
Example #39
0
def _GatherV2Grad(op, grad):
  """Gradient for GatherV2 op."""
  # params can be large, so colocate the shape calculation with it.
  #
  # params can be very large for sparse model, array_ops.shape raises
  # exception on the Windows platform when any dimension is larger than
  # int32. params_shape is not used in optimizer apply_sparse gradients,
  # so it's fine to convert it back to int32 regardless of truncation.
  params = op.inputs[0]
  with ops.colocate_with(params):
    params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
    params_shape = math_ops.to_int32(params_shape)

  indices = op.inputs[1]
  indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
  axis = op.inputs[2]
  axis_static = tensor_util.constant_value(axis)

  # For axis 0 gathers, build an appropriately shaped IndexedSlices.
  if axis_static == 0:
    values_shape = array_ops.concat([indices_size, params_shape[1:]], 0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)
    return [ops.IndexedSlices(values, indices, params_shape), None, None]

  outer_shape = params_shape[:axis]
  outer_dims = array_ops.size(outer_shape)
  inner_shape = params_shape[axis:][1:]
  inner_dims = array_ops.size(inner_shape)

  outer_axes_indices = math_ops.range(outer_dims)
  inner_axes_indices = math_ops.range(outer_dims + 1,
                                      outer_dims + 1 + inner_dims)

  values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0)
  values = array_ops.reshape(grad, values_shape)
  indices = array_ops.reshape(indices, indices_size)

  # We need to sum up every slice `values[..., i, ....]` corresponding to
  # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
  # support an axis parameter, we transpose the gather dimension to the front,
  # then use `unsorted_segment_sum` to build a
  # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
  # affecting each index in `gather_axis` summed up.
  transpose_dims = array_ops.concat(
      [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
  values_transpose = array_ops.transpose(values, transpose_dims)
  num_segments = params_shape[axis]

  params_grad = math_ops.unsorted_segment_sum(
      values_transpose, indices, num_segments)

  # Inverts the above transpose by moving dimension 0 back to its original
  # position.
  invert_transpose_dims = array_ops.concat(
      [outer_axes_indices + 1, [0], inner_axes_indices], 0)
  params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
  return [params_grad, None, None]
 def input_fn():
   random_sequence = random_ops.random_uniform(
       [batch_size, sequence_length], 0, 2, dtype=dtypes.int32, seed=seed)
   inputs = array_ops.expand_dims(math_ops.to_float(random_sequence), 2)
   labels = math_ops.to_int32(
       array_ops.squeeze(
           math_ops.reduce_sum(inputs, axis=[1]) > (
               sequence_length / 2.0)))
   return {'inputs': inputs}, labels
Example #41
0
 def _loss(probs, targets):
     if targets.get_shape().ndims > 1:
         targets = array_ops.squeeze(targets, squeeze_dims=[1])
     one_hot_labels = array_ops.one_hot(math_ops.to_int32(targets),
                                        num_classes,
                                        on_value=1.,
                                        off_value=0.,
                                        dtype=dtypes.float32)
     return loss_fn(probs, one_hot_labels)
    def body(i, prev_c, prev_h, actions, log_probs):
      # pylint: disable=g-long-lambda
      signal = control_flow_ops.cond(
          math_ops.equal(i, 0),
          lambda: array_ops.tile(device_go_embedding,
                                 [self.hparams.num_children, 1]),
          lambda: embedding_ops.embedding_lookup(device_embeddings,
                                                 actions.read(i - 1))
      )
      if self.hparams.keep_prob is not None:
        signal = nn_ops.dropout(signal, self.hparams.keep_prob)
      next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
      query = math_ops.matmul(next_h, attn_w_2)
      query = array_ops.reshape(
          query, [self.hparams.num_children, 1, self.hparams.hidden_size])
      query = math_ops.tanh(query + attn_mem)
      query = array_ops.reshape(query, [
          self.hparams.num_children * self.num_groups, self.hparams.hidden_size
      ])
      query = math_ops.matmul(query, attn_v)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups])
      query = nn_ops.softmax(query)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups, 1])
      query = math_ops.reduce_sum(attn_mem * query, axis=1)
      query = array_ops.concat([next_h, query], axis=1)
      logits = math_ops.matmul(query, device_softmax)
      logits /= self.hparams.temperature
      if self.hparams.tanh_constant > 0:
        logits = math_ops.tanh(logits) * self.hparams.tanh_constant
      if self.hparams.logits_std_noise > 0:
        num_in_logits = math_ops.cast(
            array_ops.size(logits), dtype=dtypes.float32)
        avg_norm = math_ops.divide(
            linalg_ops.norm(logits), math_ops.sqrt(num_in_logits))
        logits_noise = random_ops.random_normal(
            array_ops.shape(logits),
            stddev=self.hparams.logits_std_noise * avg_norm)
        logits = control_flow_ops.cond(
            self.global_step > self.hparams.stop_noise_step, lambda: logits,
            lambda: logits + logits_noise)

      if mode == "sample":
        next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
      elif mode == "greedy":
        next_y = math_ops.argmax(logits, 1)
      elif mode == "target":
        next_y = array_ops.slice(y, [0, i], [-1, 1])
      else:
        raise NotImplementedError
      next_y = math_ops.to_int32(next_y)
      next_y = array_ops.reshape(next_y, [self.hparams.num_children])
      actions = actions.write(i, next_y)
      log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=next_y)
      return i + 1, next_c, next_h, actions, log_probs
Example #43
0
 def body(i, context_rep, context_att):
     a = context_outputs_iter.read(i)
     b = aspect_avg_iter.read(i)
     l = math_ops.to_int32(context_lens_iter.read(i))
     context_score = tf.reshape(tf.nn.tanh(tf.matmul(tf.matmul(a, weights['context_score']), tf.reshape(b, [-1, 1])) + biases['context_score']), [1, -1])
     context_att_temp = tf.concat([tf.nn.softmax(tf.slice(context_score, [0, 0], [1, l])), tf.zeros([1, self.max_context_len - l])], 1)
     context_att = context_att.write(i, context_att_temp)
     context_rep = context_rep.write(i, tf.matmul(context_att_temp, a))
     return (i + 1, context_rep, context_att)
Example #44
0
 def _get_class_id(self, predictions_dict):
   # Handle different multiclass strategies.
   if (self._learner_config.multi_class_strategy ==
       learner_pb2.LearnerConfig.TREE_PER_CLASS and
       self._logits_dimension != 1):
     # Choose the class for which the tree is built (one vs rest).
     return math_ops.to_int32(
         predictions_dict[NUM_TREES_ATTEMPTED] % self._logits_dimension)
   return constant_op.constant(-1, dtype=dtypes.int32)
 def input_fn():
   random_sequence = random_ops.random_uniform(
       [batch_size, sequence_length], 0, 2, dtype=dtypes.int32, seed=seed)
   inputs = array_ops.expand_dims(math_ops.to_float(random_sequence), 2)
   labels = math_ops.to_int32(
       array_ops.squeeze(
           math_ops.reduce_sum(
               inputs, reduction_indices=[1]) > (sequence_length / 2.0)))
   return {'inputs': inputs}, labels
Example #46
0
 def _loss(probs, targets):
   if targets.get_shape().ndims > 1:
     targets = array_ops.squeeze(targets, squeeze_dims=[1])
   one_hot_labels = array_ops.one_hot(
       math_ops.to_int32(targets),
       num_classes,
       on_value=1.,
       off_value=0.,
       dtype=dtypes.float32)
   return loss_fn(probs, one_hot_labels)
Example #47
0
 def func_body(iteration, scores_margin):
   # swap the current medoid with the candidate cluster member
   candidate_medoid = math_ops.to_int32(cluster_member_ids[iteration])
   tmp_chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, candidate_medoid)
   predictions = get_cluster_assignment(pairwise_distances, tmp_chosen_ids)
   metric_score = compute_clustering_score(labels, predictions, margin_type)
   pad_before = array_ops.zeros([iteration])
   pad_after = array_ops.zeros([num_candidates - 1 - iteration])
   return iteration + 1, scores_margin + array_ops.concat(
       [pad_before, [1.0 - metric_score], pad_after], 0)
Example #48
0
 def func_body(iteration, scores_margin):
   # swap the current medoid with the candidate cluster member
   candidate_medoid = math_ops.to_int32(cluster_member_ids[iteration])
   tmp_chosen_ids = update_1d_tensor(chosen_ids, cluster_idx, candidate_medoid)
   predictions = get_cluster_assignment(pairwise_distances, tmp_chosen_ids)
   metric_score = compute_clustering_score(labels, predictions, margin_type)
   pad_before = array_ops.zeros([iteration])
   pad_after = array_ops.zeros([num_candidates - 1 - iteration])
   return iteration + 1, scores_margin + array_ops.concat(
       [pad_before, [1.0 - metric_score], pad_after], 0)
Example #49
0
def my_rnn(alphabetEnc, cell, inputs, initial_state=None, dtype=None,
        sequence_length=None, scope=None):

  if not isinstance(cell, rnn_cell.RNNCell):
    raise TypeError("cell must be an instance of RNNCell")
  if not isinstance(inputs, list):
    raise TypeError("inputs must be a list")
  if not inputs:
    raise ValueError("inputs must not be empty")

  outputs = []
  with vs.variable_scope(scope or "RNN"):
    fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]
    if fixed_batch_size.value:
      batch_size = fixed_batch_size.value
    else:
      batch_size = array_ops.shape(inputs[0])[0]
    if initial_state is not None:
      state = initial_state
    else:
      if not dtype:
        raise ValueError("If no initial_state is provided, dtype must be.")
      state = cell.zero_state(batch_size, dtype)

    if sequence_length is not None:
      sequence_length = math_ops.to_int32(sequence_length)

    if sequence_length:  # Prepare variables
      zero_output = array_ops.zeros(
          array_ops.pack([batch_size, cell.output_size]), inputs[0].dtype)
      zero_output.set_shape(
          tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size]))


      min_sequence_length = math_ops.reduce_min(sequence_length)
      max_sequence_length = math_ops.reduce_max(sequence_length)


    for time, input_ in enumerate(inputs):
      if time > 0: vs.get_variable_scope().reuse_variables()
      # pylint: disable=cell-var-from-loop
      call_cell = lambda: cell([ input_ , alphabetEnc[time] ], state)
      # pylint: enable=cell-var-from-loop
      if sequence_length:

        (output, state) = _rnn_step(
            time, sequence_length, min_sequence_length, max_sequence_length,
            zero_output, state, call_cell)
      else:

        (output, state) = call_cell()

      outputs.append(output)

    return (outputs, state)
Example #50
0
    def ctc_lambda_func(self, args):
        y_pred, labels, input_length, label_length = args
        y_pred = y_pred[:, :, :]

        label_length = math_ops.to_int32(
            array_ops.squeeze(label_length, axis=-1))
        input_length = math_ops.to_int32(
            array_ops.squeeze(input_length, axis=-1))

        sparse_labels = math_ops.to_int32(
            ctc_label_dense_to_sparse(labels, label_length))

        y_pred = math_ops.log(
            array_ops.transpose(y_pred, perm=[1, 0, 2]) + epsilon())

        return array_ops.expand_dims(
            ctc.ctc_loss(inputs=y_pred,
                         labels=sparse_labels,
                         sequence_length=input_length,
                         ignore_longer_outputs_than_inputs=True), 1)
Example #51
0
def generate_sequence_output(encoder_outputs, 
                  encoder_state,
                  num_decoder_symbols,
                  sequence_length,
                num_heads=1,
                dtype=dtypes.float32,
                use_attention=True,
                loop_function=None,
                scope=None,
                DNN_at_output=False,
                forward_only=False):
  with variable_scope.variable_scope(scope or "non-attention_RNN"):
    attention_encoder_outputs = list()
    sequence_attention_weights = list()
    
    # copy over logits once out of sequence_length
    if encoder_outputs[0].get_shape().ndims != 1:
      (fixed_batch_size, output_size) = encoder_outputs[0].get_shape().with_rank(2)
    else:
      fixed_batch_size = encoder_outputs[0].get_shape().with_rank_at_least(1)[0]

    if fixed_batch_size.value: 
      batch_size = fixed_batch_size.value
    else:
      batch_size = array_ops.shape(encoder_outputs[0])[0]
    if sequence_length is not None:
      sequence_length = math_ops.to_int32(sequence_length)
    if sequence_length is not None:  # Prepare variables
      zero_logit = array_ops.zeros(
          array_ops.pack([batch_size, num_decoder_symbols]), encoder_outputs[0].dtype)
      zero_logit.set_shape(
          tensor_shape.TensorShape([fixed_batch_size.value, num_decoder_symbols]))
      min_sequence_length = math_ops.reduce_min(sequence_length)
      max_sequence_length = math_ops.reduce_max(sequence_length)
  
    for time, input_ in enumerate(encoder_outputs):
      if time > 0: variable_scope.get_variable_scope().reuse_variables()
      
      if not DNN_at_output:  
        generate_logit = lambda: linear_transformation(encoder_outputs[time], output_size, num_decoder_symbols)
      else:
        generate_logit = lambda: multilayer_perceptron(encoder_outputs[time], output_size, 200, num_decoder_symbols, forward_only=forward_only)
      # pylint: enable=cell-var-from-loop
      if sequence_length is not None:
        logit = _step(
            time, sequence_length, min_sequence_length, max_sequence_length, zero_logit, generate_logit)
      else:
        logit = generate_logit
      attention_encoder_outputs.append(logit)   
    if DNN_at_output:  
      regularizers = get_multilayer_perceptron_regularizers()
    else:
      regularizers = get_linear_transformation_regularizers()
  return attention_encoder_outputs, sequence_attention_weights, regularizers
def my_ctc_decode(y_pred,
                  input_length,
                  greedy=False,
                  beam_width=50,
                  top_paths=1):
    """Decodes the output of a softmax.
  Can use either greedy search (also known as best path)
  or a constrained dictionary search.
  Arguments:
      y_pred: tensor `(samples, time_steps, num_categories)`
          containing the prediction, or output of the softmax.
      input_length: tensor `(samples, )` containing the sequence length for
          each batch item in `y_pred`.
      greedy: perform much faster best-path search if `true`.
          This does not use a dictionary.
      beam_width: if `greedy` is `false`: a beam search decoder will be used
          with a beam of this width.
      top_paths: if `greedy` is `false`,
          how many of the most probable paths will be returned.
  Returns:
      Tuple:
          List: if `greedy` is `true`, returns a list of one element that
              contains the decoded sequence.
              If `false`, returns the `top_paths` most probable
              decoded sequences.
              Important: blank labels are returned as `-1`.
          Tensor `(top_paths, )` that contains
              the log probability of each decoded sequence.
  """
    y_pred = math_ops.log(
        array_ops.transpose(y_pred, perm=[1, 0, 2]) + epsilon())
    input_length = math_ops.to_int32(input_length)

    if greedy:
        (decoded,
         log_prob) = ctc.ctc_greedy_decoder(inputs=y_pred,
                                            sequence_length=input_length)
    else:
        (decoded,
         log_prob) = my_ctc_beam_search_decoder(inputs=y_pred,
                                                sequence_length=input_length,
                                                beam_width=beam_width,
                                                top_paths=top_paths)
    decoded_dense = [
        sparse_ops.sparse_to_dense(st.indices,
                                   st.dense_shape,
                                   st.values,
                                   default_value=-1) for st in decoded
    ]
    return (decoded_dense, log_prob)
Example #53
0
def matrix_square_root(mat_a, mat_a_size, iter_count=100, ridge_epsilon=1e-4):
    """Iterative method to get matrix square root.

  Stable iterations for the matrix square root, Nicholas J. Higham

  Page 231, Eq 2.6b
  http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.8799&rep=rep1&type=pdf

  Args:
    mat_a: the symmetric PSD matrix whose matrix square root be computed
    mat_a_size: size of mat_a.
    iter_count: Maximum number of iterations.
    ridge_epsilon: Ridge epsilon added to make the matrix positive definite.

  Returns:
    mat_a^0.5
  """
    def _iter_condition(i, unused_mat_y, unused_old_mat_y, unused_mat_z,
                        unused_old_mat_z, err, old_err):
        # This method require that we check for divergence every step.
        return math_ops.logical_and(i < iter_count, err < old_err)

    def _iter_body(i, mat_y, unused_old_mat_y, mat_z, unused_old_mat_z, err,
                   unused_old_err):
        current_iterate = 0.5 * (3.0 * identity -
                                 math_ops.matmul(mat_z, mat_y))
        current_mat_y = math_ops.matmul(mat_y, current_iterate)
        current_mat_z = math_ops.matmul(current_iterate, mat_z)
        # Compute the error in approximation.
        mat_sqrt_a = current_mat_y * math_ops.sqrt(norm)
        mat_a_approx = math_ops.matmul(mat_sqrt_a, mat_sqrt_a)
        residual = mat_a - mat_a_approx
        current_err = math_ops.sqrt(math_ops.reduce_sum(
            residual * residual)) / norm
        return i + 1, current_mat_y, mat_y, current_mat_z, mat_z, current_err, err

    identity = linalg_ops.eye(math_ops.to_int32(mat_a_size))
    mat_a = mat_a + ridge_epsilon * identity
    norm = math_ops.sqrt(math_ops.reduce_sum(mat_a * mat_a))
    mat_init_y = mat_a / norm
    mat_init_z = identity
    init_err = norm

    _, _, prev_mat_y, _, _, _, _ = control_flow_ops.while_loop(
        _iter_condition, _iter_body, [
            0, mat_init_y, mat_init_y, mat_init_z, mat_init_z, init_err,
            init_err + 1.0
        ])
    return prev_mat_y * math_ops.sqrt(norm)
Example #54
0
 def body(i, entities_rep, entities_att):
     a1 = entities1_outputs_iter.read(i)
     a2 = entities2_outputs_iter.read(i)
     b = context_avg_iter.read(i)
     l1 = math_ops.to_int32(entities1_lens_iter.read(i))
     l2 = math_ops.to_int32(entities2_lens_iter.read(i))
     print(l1)
     e1 = tf.matmul(a1, weights['entities1_score'])
     e2 = tf.matmul(a2, weights['entities2_score'])
     e12 = tf.matmul(e1, e2)
     # entities_score = tf.reshape(tf.nn.tanh(tf.matmul(e12, tf.reshape(b, [-1, 1])) + biases['entities1_score'] + biases['entities2_score']), [1, -1])
     entities_score = tf.reshape(
         tf.nn.tanh(
             tf.matmul(e12, tf.reshape(b, [-1, 1])) +
             biases['context_score']), [1, -1])
     print(entities_score.shape)
     entities_att_temp = tf.concat([
         tf.nn.softmax(tf.slice(entities_score, [0, 0], [1, l1])),
         tf.zeros([1, self.max_entities_len - l1])
     ], 1)
     entities_att = entities_att.write(i, entities_att_temp)
     entities_rep = entities_rep.write(
         i, tf.matmul(tf.matmul(a1, a2), entities_att_temp))
     return (i + 1, entities_rep, entities_att)
Example #55
0
 def refresh_shortlist():
     """Update the shortlist with the highest scores in id_to_score."""
     new_scores, new_ids = nn_ops.top_k(self.id_to_score,
                                        self.shortlist_size)
     smallest_new_score = math_ops.reduce_min(new_scores)
     new_length = math_ops.reduce_sum(
         math_ops.to_int32(
             math_ops.greater(new_scores, dtypes.float32.min)))
     u1 = self.sl_ids.assign(
         math_ops.to_int64(array_ops.concat([[new_length], new_ids],
                                            0)))
     u2 = self.sl_scores.assign(
         array_ops.concat([[smallest_new_score], new_scores], 0))
     self.last_ops = [u1, u2]
     return control_flow_ops.group(u1, u2)
Example #56
0
    def make_grouping_predictions(self, input_layer, reuse=None):
        """model that predicts grouping (grouping_actions).

    Args:
      input_layer: group_input_layer
      reuse: reuse

    Returns:
       grouping_actions: actions
       grouping_log_probs: log probabilities corresponding to actions
    """
        with variable_scope.variable_scope(self.hparams.name, reuse=True):
            # input_layer: tensor of size [1, num_ops, hidden_size]
            w_grouping_ff = variable_scope.get_variable("w_grouping_ff")
            w_grouping_softmax = variable_scope.get_variable(
                "w_grouping_softmax")

        batch_size = array_ops.shape(input_layer)[0]
        embedding_dim = array_ops.shape(input_layer)[2]

        reshaped = array_ops.reshape(
            input_layer, [batch_size * self.num_ops, embedding_dim])
        ff_output = math_ops.matmul(reshaped, w_grouping_ff)
        logits = math_ops.matmul(ff_output, w_grouping_softmax)
        if self.hparams.logits_std_noise > 0:
            num_in_logits = math_ops.cast(array_ops.size(logits),
                                          dtype=dtypes.float32)
            avg_norm = math_ops.divide(linalg_ops.norm(logits),
                                       math_ops.sqrt(num_in_logits))
            logits_noise = random_ops.random_normal(
                array_ops.shape(logits),
                stddev=self.hparams.logits_std_noise * avg_norm)
            logits = control_flow_ops.cond(
                self.global_step > self.hparams.stop_noise_step,
                lambda: logits, lambda: logits + logits_noise)
        logits = array_ops.reshape(
            logits, [batch_size * self.num_ops, self.num_groups])
        actions = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
        actions = math_ops.to_int32(actions)
        actions = array_ops.reshape(actions, [batch_size, self.num_ops])
        action_label = array_ops.reshape(actions, [-1])
        log_probs = nn_ops.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=action_label)
        log_probs = array_ops.reshape(log_probs, [batch_size, -1])
        log_probs = math_ops.reduce_sum(log_probs, 1)
        grouping_actions = actions
        grouping_log_probs = log_probs
        return grouping_actions, grouping_log_probs
Example #57
0
def _form_group_indices_nd(is_valid, group_size, shuffle=True):
    """Forms the indices for groups for gather_nd or scatter_nd.

  Args:
    is_valid: A boolen `Tensor` for entry validity with shape [batch_size,
      list_size].
    group_size: An scalar int `Tensor` for the number of examples in a group.
    shuffle: A boolean that indicates whether valid indices should be shuffled
      when forming group indices.

  Returns:
    A tuple of Tensors (indices, mask). The first has shape [batch_size,
    num_groups, group_size, 2] and it can be used in gather_nd or scatter_nd for
    group features. The second has the shape of [batch_size, num_groups] with
    value True for valid groups.
  """
    with ops.name_scope(None, 'form_group_indices', (is_valid, group_size)):
        is_valid = ops.convert_to_tensor(is_valid)
        batch_size, list_size = array_ops.unstack(array_ops.shape(is_valid))
        num_valid_entries = math_ops.reduce_sum(math_ops.to_int32(is_valid),
                                                axis=1)

        # A tuple of Tensors (batch_rw_indices, batch_indices_mask). The first has
        # shape [batch_size, size, rw_size] and the second has shape [batch_size,
        #                                                             size].
        rw_indices, mask = _rolling_window_indices(list_size, group_size,
                                                   num_valid_entries)
        # Valid indices of the tensor are shuffled and put on the top.
        # [batch_size, list_size, 2]. A determinstic op-level seed is set mainly for
        # unittest purpose. We can find a better way to avoid setting this seed
        # explicitly.
        # A tensor of indices with shape [batch_size, list_size, 2]. The returned
        # tensor can be used with `tf.gather_nd` and `tf.scatter_nd` to compose a new
        # [batch_size, list_size] tensor. The values in the last dimension are the
        # indices for an element in the input tensor.
        shuffled_indices = utils.organize_valid_indices(is_valid,
                                                        shuffle=shuffle,
                                                        seed=87124)
        # Construct indices for gather_nd.
        # [batch_size, num_groups, group_size, 2, 1]
        group_indices_nd = array_ops.expand_dims(rw_indices, axis=3)
        group_indices_nd = array_ops.concat([
            array_ops.reshape(math_ops.range(batch_size), [-1, 1, 1, 1]) *
            array_ops.ones_like(group_indices_nd), group_indices_nd
        ], 3)

        indices = array_ops.gather_nd(shuffled_indices, group_indices_nd)
        return indices, mask
Example #58
0
 def aspect_body(i, aspect_rep, aspect_att, weights, biases):
     a = aspect_outputs_iter.read(i)
     b = context_avg_iter.read(i)
     l = math_ops.to_int32(aspect_lens_iter.read(i))
     aspect_score = tf.reshape(
         tf.nn.tanh(
             tf.matmul(tf.matmul(a, weights['aspect_score']),
                       tf.reshape(b, [-1, 1])) +
             biases['aspect_score']), [1, -1])
     aspect_att_temp = tf.concat([
         tf.nn.softmax(tf.slice(aspect_score, [0, 0], [1, l])),
         tf.zeros([1, self.cfg.MaxAspectLength - l])
     ], 1)
     aspect_att = aspect_att.write(i, aspect_att_temp)
     aspect_rep = aspect_rep.write(i, tf.matmul(aspect_att_temp, a))
     return (i + 1, aspect_rep, aspect_att, weights, biases)
Example #59
0
def dynamic_rnn_decoder(decoder_inputs,
                        cell,
                        initial_state=None,
                        dtype=dtypes.float32,
                        sequence_length=None,
                        loop_function=None,
                        parallel_iterations=None,
                        swap_memory=False,
                        time_major=False,
                        scope=None):
    """RNN decoder for seq2seq model.
	This function is functionally identical to the function `rnn_decoder` above, but performs fully dynamic unrolling of `inputs`.
	Unlike `rnn_decoder`, the input `inputs` is not a Python list of `Tensors`. Instead it is a single `Tensor` where the maximum time is either the first or second dimension (see the parameter `time_major`). The corresponding output is a single `Tensor` having the same number of time steps and batch size.

	The parameter `sequence_length` is required and dynamic calculation is automatically performed.

	Args:
		decoder_inputs: the RNN decoder inputs.
		If time_major == False (default), this must be a tensor of shape:
		`[batch_size,max_time,cell_input_size]`
		If time_major == True, this must be a tensor of shape:
		`[max_time,batch_size,cell.input_size]`
	"""
    if not isinstance(cell, rnn_cell.RNNCell):
        raise TypeError("cell must be an instance of RNNCell")
    if not time_major:
        inputs = array_ops.transpose(decoder_inputs, [1, 0, 2])
    parallel_iterations = parallel_iterations or 32
    if sequence_length is not None:
        sequence_length = math_ops.to_int32(sequence_length)
        sequence_length = array_ops.identity(sequence_length,
                                             name="sequence_length")
    with variable_scope.variable_scope(scope
                                       or "dynamic_rnn_decoder") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        outputs, state = _dynamic_rnn_decoder_loop(inputs, initial_state, cell,
                                                   sequence_length,
                                                   loop_function,
                                                   parallel_iterations,
                                                   swap_memory, dtype)

        if not time_major:
            outputs = array_ops.transpose(outputs, [1, 0, 2])
        return outputs, state