コード例 #1
0
ファイル: crf.py プロジェクト: AlbertXiebnu/tensorflow
def crf_unary_score(tag_indices, sequence_lengths, inputs):
  """Computes the unary scores of tag sequences.

  Args:
    tag_indices: A [batch_size, max_seq_len] matrix of tag indices.
    sequence_lengths: A [batch_size] vector of true sequence lengths.
    inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials.
  Returns:
    unary_scores: A [batch_size] vector of unary scores.
  """
  batch_size = array_ops.shape(inputs)[0]
  max_seq_len = array_ops.shape(inputs)[1]
  num_tags = array_ops.shape(inputs)[2]

  flattened_inputs = array_ops.reshape(inputs, [-1])

  offsets = array_ops.expand_dims(
      math_ops.range(batch_size) * max_seq_len * num_tags, 1)
  offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0)
  flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1])

  unary_scores = array_ops.reshape(
      array_ops.gather(flattened_inputs, flattened_tag_indices),
      [batch_size, max_seq_len])

  masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1])

  unary_scores = math_ops.reduce_sum(unary_scores * masks, 1)
  return unary_scores
コード例 #2
0
ファイル: crf_test.py プロジェクト: 1000sprites/tensorflow
  def testCrfLogNorm(self):
    inputs = np.array(
        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
    transition_params = np.array(
        [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]
    sequence_lengths = np.array(3, dtype=np.int32)
    with self.test_session() as sess:
      all_sequence_scores = []

      # Compare the dynamic program with brute force computation.
      for tag_indices in itertools.product(
          range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        all_sequence_scores.append(
            crf.crf_sequence_score(
                inputs=array_ops.expand_dims(inputs, 0),
                tag_indices=array_ops.expand_dims(tag_indices, 0),
                sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
                transition_params=constant_op.constant(transition_params)))

      brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores)
      log_norm = crf.crf_log_norm(
          inputs=array_ops.expand_dims(inputs, 0),
          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
          transition_params=constant_op.constant(transition_params))
      log_norm = array_ops.squeeze(log_norm, [0])
      tf_brute_force_log_norm, tf_log_norm = sess.run(
          [brute_force_log_norm, log_norm])

      self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
コード例 #3
0
def _cross_squared_distance_matrix(x, y):
  """Pairwise squared distance between two (batch) matrices' rows (2nd dim).

  Computes the pairwise distances between rows of x and rows of y
  Args:
    x: [batch_size, n, d] float `Tensor`
    y: [batch_size, m, d] float `Tensor`

  Returns:
    squared_dists: [batch_size, n, m] float `Tensor`, where
    squared_dists[b,i,j] = ||x[b,i,:] - y[b,j,:]||^2
  """
  x_norm_squared = math_ops.reduce_sum(math_ops.square(x), 2)
  y_norm_squared = math_ops.reduce_sum(math_ops.square(y), 2)

  # Expand so that we can broadcast.
  x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2)
  y_norm_squared_tile = array_ops.expand_dims(y_norm_squared, 1)

  x_y_transpose = math_ops.matmul(x, y, adjoint_b=True)

  # squared_dists[b,i,j] = ||x_bi - y_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj
  squared_dists = x_norm_squared_tile - 2 * x_y_transpose + y_norm_squared_tile

  return squared_dists
コード例 #4
0
def _smart_select(pred, fn_then, fn_else):
  """Selects fn_then() or fn_else() based on the value of pred.

  The purpose of this function is the same as `utils.smart_cond`. However, at
  the moment there is a bug (b/36297356) that seems to kick in only when
  `smart_cond` delegates to `tf.cond`, which sometimes results in the training
  hanging when using parameter servers. This function will output the result
  of `fn_then` or `fn_else` if `pred` is known at graph construction time.
  Otherwise, it will use `tf.where` which will result in some redundant work
  (both branches will be computed but only one selected). However, the tensors
  involved will usually be small (means and variances in batchnorm), so the
  cost will be small and will not be incurred at all if `pred` is a constant.

  Args:
    pred: A boolean scalar `Tensor`.
    fn_then: A callable to use when pred==True.
    fn_else: A callable to use when pred==False.

  Returns:
    A `Tensor` whose value is fn_then() or fn_else() based on the value of pred.
  """
  pred_value = utils.constant_value(pred)
  if pred_value:
    return fn_then()
  elif pred_value is False:
    return fn_else()
  t_then = array_ops.expand_dims(fn_then(), 0)
  t_else = array_ops.expand_dims(fn_else(), 0)
  pred = array_ops.reshape(pred, [1])
  result = array_ops.where(pred, t_then, t_else)
  return array_ops.squeeze(result, [0])
コード例 #5
0
ファイル: uniform.py プロジェクト: 0ruben/tensorflow
  def sample(self, n, seed=None, name="sample"):
    """Sample `n` observations from the Uniform Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
          with values of type `self.dtype`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self.a, self.b, n], name):
        n = ops.convert_to_tensor(n, name="n")
        n_val = tensor_util.constant_value(n)

        shape = array_ops.concat(0, [array_ops.pack([n]), self.batch_shape()])
        samples = random_ops.random_uniform(shape=shape,
                                            dtype=self.dtype,
                                            seed=seed)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        samples.set_shape(inferred_shape)

        return (array_ops.expand_dims(self.a, 0) + array_ops.expand_dims(
            self.range(), 0) * samples)
コード例 #6
0
ファイル: multinomial.py プロジェクト: apollos/tensorflow
 def _variance(self):
   p = self.p * array_ops.expand_dims(array_ops.ones_like(self.n), -1)
   outer_prod = math_ops.batch_matmul(
       array_ops.expand_dims(self._mean_val, -1),
       array_ops.expand_dims(p, -2))
   return array_ops.batch_matrix_set_diag(
       -outer_prod, self._mean_val - self._mean_val * p)
コード例 #7
0
ファイル: math_utils.py プロジェクト: AutumnQYN/tensorflow
def power_sums_tensor(array_size, power_matrix, multiplier):
  r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1).

  Args:
    array_size: The number of non-trivial sums to pre-compute.
    power_matrix: The "A" matrix above.
    multiplier: The "B" matrix above
  Returns:
    A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
      S[0] is the zero matrix
      S[1] is B
      S[2] is A B A^T + B
      ...and so on
  """
  array_size = math_ops.cast(array_size, dtypes.int32)
  power_matrix = ops.convert_to_tensor(power_matrix)
  identity_like_power_matrix = linalg_ops.eye(
      array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype)
  identity_like_power_matrix.set_shape(
      ops.convert_to_tensor(power_matrix).get_shape())
  transition_powers = functional_ops.scan(
      lambda previous_power, _: math_ops.matmul(previous_power, power_matrix),
      math_ops.range(array_size - 1),
      initializer=identity_like_power_matrix)
  summed = math_ops.cumsum(
      array_ops.concat([
          array_ops.expand_dims(multiplier, 0), math_ops.matmul(
              batch_times_matrix(transition_powers, multiplier),
              transition_powers,
              adjoint_b=True)
      ], 0))
  return array_ops.concat(
      [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
コード例 #8
0
ファイル: init_ops.py プロジェクト: moses-sun/tensorflow
  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 3 or len(shape) > 5:
      raise ValueError("The tensor to initialize must be at least "
                       "three-dimensional and at most five-dimensional")

    if shape[-2] > shape[-1]:
      raise ValueError("In_filters cannot be greater than out_filters.")

    # Generate a random matrix
    a = random_ops.random_normal([shape[-1], shape[-1]],
                                 dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    q *= math_ops.sign(d)
    q = q[:shape[-2], :]
    q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype))
    if len(shape) == 3:
      weight = array_ops.scatter_nd([[(shape[0]-1)//2]],
                                    array_ops.expand_dims(q, 0), shape)
    elif len(shape) == 4:
      weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2]],
                                    array_ops.expand_dims(q, 0), shape)
    else:
      weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2,
                                      (shape[2]-1)//2]],
                                    array_ops.expand_dims(q, 0), shape)
    return weight
コード例 #9
0
ファイル: init_ops.py プロジェクト: moses-sun/tensorflow
  def _orthogonal_kernel(self, ksize, cin, cout):
    """Construct orthogonal kernel for convolution.

    Args:
      ksize: kernel size
      cin: number of input channels
      cout: number of output channels
    Returns:
      an [ksize, ksize, cin, cout] orthogonal kernel.
    Raises:
      ValueError: if cin > cout.
    """
    if cin > cout:
      raise ValueError("The number of input channels cannot exceed "
                       "the number of output channels.")
    orth = self._orthogonal_matrix(cout)[0:cin, :]
    if ksize == 1:
      return array_ops.expand_dims(array_ops.expand_dims(orth, 0), 0)

    p = self._block_orth(self._symmetric_projection(cout),
                         self._symmetric_projection(cout))
    for _ in range(ksize - 2):
      temp = self._block_orth(self._symmetric_projection(cout),
                              self._symmetric_projection(cout))
      p = self._matrix_conv(p, temp)
    for i in range(ksize):
      for j in range(ksize):
        p[i, j] = math_ops.matmul(orth, p[i, j])

    return self._dict_to_tensor(p, ksize, ksize)
コード例 #10
0
ファイル: nn_grad.py プロジェクト: DjangoPeng/tensorflow
def _NthElementGrad(op, grad):
  """Return the gradients for NthElement.

  Args:
    op: The NthElementOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the NthElementOp

  Returns:
    A list of two tensors, the first being the gradient w.r.t. the input,
    the second being the gradient w.r.t. the N (None).
  """
  input = op.inputs[0]
  output = op.outputs[0]

  # Compute the number of elements which equal to output in each reduction
  # dimension. If there are multiple elements then the gradient will be
  # divided between them.
  indicators = math_ops.cast(
      math_ops.equal(array_ops.expand_dims(output, -1), input),
      grad.dtype)

  grad = array_ops.expand_dims(grad, -1)
  num_selected = array_ops.expand_dims(
      math_ops.reduce_sum(indicators, -1), -1)

  return [math_ops.div(indicators, num_selected) * grad, None]
コード例 #11
0
ファイル: nn_grad.py プロジェクト: adit-chandra/tensorflow
def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad):
  """Gradient function for SoftmaxCrossEntropyWithLogits."""
  # grad_loss is the backprop for cost, and we multiply it with the gradients
  # (which is output[1])
  # grad_grad is the backprop for softmax gradient.
  #
  # Second derivative is just softmax derivative w.r.t. logits.
  softmax_grad = op.outputs[1]
  grad = _BroadcastMul(grad_loss, softmax_grad)

  def IsZero(g):
    # Some introspection to check if the gradient is feeding zeros
    if context.executing_eagerly():
      # TODO(apassos) add an efficient way to detect eager zeros here.
      return False
    if g.op.type in ("ZerosLike", "Zeros"):
      return True
    const_fill_value = tensor_util.constant_value(g)
    return const_fill_value is not None and (const_fill_value == 0).all()

  logits = op.inputs[0]
  if grad_grad is not None and not IsZero(grad_grad):
    softmax = nn_ops.softmax(logits)

    grad += ((grad_grad - array_ops.squeeze(
        math_ops.matmul(
            array_ops.expand_dims(grad_grad, 1),
            array_ops.expand_dims(softmax, 2)),
        axis=1)) * softmax)

  return grad, _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
コード例 #12
0
def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias, units):
  inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
  input_h = array_ops.expand_dims(input_h, axis=0)
  input_c = array_ops.expand_dims(input_c, axis=0)

  params = _canonical_to_params(
      weights=[
          kernel[:, :units],
          kernel[:, units:units * 2],
          kernel[:, units * 2:units * 3],
          kernel[:, units * 3:],
          recurrent_kernel[:, :units],
          recurrent_kernel[:, units:units * 2],
          recurrent_kernel[:, units * 2:units * 3],
          recurrent_kernel[:, units * 3:],
      ],
      biases=[
          bias[:units],
          bias[units:units * 2],
          bias[units * 2:units * 3],
          bias[units * 3:units * 4],
          bias[units * 4:units * 5],
          bias[units * 5:units * 6],
          bias[units * 6:units * 7],
          bias[units * 7:],
      ],
      shape=constant_op.constant([-1]))

  outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
      inputs, input_h=input_h, input_c=input_c, params=params)
  outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
  h = h[0]
  c = c[0]
  return outputs, [h, c], constant_op.constant(
      'cudnn', dtype=dtypes.string, name='runtime')
コード例 #13
0
  def _operator_and_matrix(self, build_info, dtype, use_placeholder):
    shape = list(build_info.shape)
    assert shape[-1] == shape[-2]

    batch_shape = shape[:-2]
    num_rows = shape[-1]

    # Uniform values that are at least length 1 from the origin.  Allows the
    # operator to be well conditioned.
    # Shape batch_shape
    multiplier = linear_operator_test_util.random_sign_uniform(
        shape=batch_shape, minval=1., maxval=2., dtype=dtype)


    # Nothing to feed since LinearOperatorScaledIdentity takes no Tensor args.
    lin_op_multiplier = multiplier

    if use_placeholder:
      lin_op_multiplier = array_ops.placeholder_with_default(
          multiplier, shape=None)

    operator = linalg_lib.LinearOperatorScaledIdentity(
        num_rows, lin_op_multiplier)

    multiplier_matrix = array_ops.expand_dims(
        array_ops.expand_dims(multiplier, -1), -1)
    matrix = multiplier_matrix * linalg_ops.eye(
        num_rows, batch_shape=batch_shape, dtype=dtype)

    return operator, matrix
コード例 #14
0
 def call(self, inputs, mask=None):
   self._validate_call_args(inputs=inputs, mask=mask)
   q = inputs[0]
   v = inputs[1]
   k = inputs[2] if len(inputs) > 2 else v
   q_mask = mask[0] if mask else None
   v_mask = mask[1] if mask else None
   scores = self._calculate_scores(query=q, key=k)
   if v_mask is not None:
     # Mask of shape [batch_size, 1, Tv].
     v_mask = array_ops.expand_dims(v_mask, axis=-2)
   if self.causal:
     # Creates a lower triangular mask, so position i cannot attend to
     # positions j>i. This prevents the flow of information from the future
     # into the past.
     scores_shape = array_ops.shape(scores)
     # causal_mask_shape = [1, Tq, Tv].
     causal_mask_shape = array_ops.concat(
         [array_ops.ones_like(scores_shape[:-2]), scores_shape[-2:]],
         axis=0)
     causal_mask = _lower_triangular_mask(causal_mask_shape)
   else:
     causal_mask = None
   scores_mask = _merge_masks(v_mask, causal_mask)
   result = self._apply_scores(scores=scores, value=v, scores_mask=scores_mask)
   if q_mask is not None:
     # Mask of shape [batch_size, Tq, 1].
     q_mask = array_ops.expand_dims(q_mask, axis=-1)
     result *= math_ops.cast(q_mask, dtype=result.dtype)
   return result
コード例 #15
0
ファイル: array_grad.py プロジェクト: Wajih-O/tensorflow
def _ExtractImagePatchesGrad(op, grad):
  batch_size, rows_in, cols_in, channels = [
      dim.value for dim in op.inputs[0].shape.dims
  ]
  input_bhwc = array_ops.shape(op.inputs[0])
  batch_size = input_bhwc[0]
  channels = input_bhwc[3]

  # Create indices matrix for input tensor.
  # Note that 0 is preserved for padding location,
  # so indices for input start from 1 to 1 + rows_in * cols_in.
  input_indices_num = 1 + rows_in * cols_in
  input_idx = array_ops.reshape(math_ops.range(1, input_indices_num,
                                               dtype=ops.dtypes.int64),
                                (1, rows_in, cols_in, 1))
  input_idx_patched = gen_array_ops.extract_image_patches(
      input_idx,
      op.get_attr("ksizes"),
      op.get_attr("strides"),
      op.get_attr("rates"),
      op.get_attr("padding"))

  # Create indices matrix for output tensor.
  _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].shape.dims]
  _, ksize_r, ksize_c, _ = op.get_attr("ksizes")
  # Indices for output start from 0.
  output_indices_num = rows_out * cols_out * ksize_r * ksize_c
  output_idx = array_ops.reshape(math_ops.range(output_indices_num,
                                                dtype=ops.dtypes.int64),
                                 (1, rows_out, cols_out, ksize_r * ksize_c))

  # Construct mapping table for indices: (input -> output).
  idx_matrix = array_ops.concat(
      [array_ops.expand_dims(input_idx_patched, axis=-1),
       array_ops.expand_dims(output_idx, axis=-1)],
      axis=-1)
  idx_map = array_ops.reshape(idx_matrix, (-1, 2))

  sp_shape = (input_indices_num, output_indices_num)
  sp_mat_full = sparse_tensor.SparseTensor(
      idx_map,
      array_ops.ones([output_indices_num], dtype=grad.dtype),
      sp_shape)
  # Remove all padding locations [0, :].
  sp_mat = sparse_ops.sparse_slice(sp_mat_full,
                                   (1, 0),
                                   (input_indices_num - 1, output_indices_num))

  grad_expanded = array_ops.transpose(
      array_ops.reshape(
          grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)),
      (1, 2, 3, 4, 0, 5))
  grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

  jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

  grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels))
  grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

  return [grad_out]
コード例 #16
0
ファイル: crf_test.py プロジェクト: bikong2/tensorflow
  def testCrfLogLikelihood(self):
    inputs = np.array(
        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
    transition_params = np.array(
        [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
    sequence_lengths = np.array(3, dtype=np.int32)
    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]
    with self.test_session() as sess:
      all_sequence_log_likelihoods = []

      # Make sure all probabilities sum to 1.
      for tag_indices in itertools.product(
          range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        sequence_log_likelihood, _ = crf.crf_log_likelihood(
            inputs=array_ops.expand_dims(inputs, 0),
            tag_indices=array_ops.expand_dims(tag_indices, 0),
            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
            transition_params=constant_op.constant(transition_params))
        all_sequence_log_likelihoods.append(sequence_log_likelihood)
      total_log_likelihood = math_ops.reduce_logsumexp(
          all_sequence_log_likelihoods)
      tf_total_log_likelihood = sess.run(total_log_likelihood)
      self.assertAllClose(tf_total_log_likelihood, 0.0)
コード例 #17
0
  def _testDrawBoundingBoxColorCycling(self, img):
    """Tests if cycling works appropriately.

    Args:
      img: 3-D numpy image on which to draw.
    """
    # THIS TABLE MUST MATCH draw_bounding_box_op.cc
    color_table = np.asarray([[1, 1, 0, 1], [0, 0, 1, 1], [1, 0, 0, 1],
                              [0, 1, 0, 1], [0.5, 0, 0.5, 1], [0.5, 0.5, 0, 1],
                              [0.5, 0, 0, 1], [0, 0, 0.5, 1], [0, 1, 1, 1],
                              [1, 0, 1, 1]])
    assert len(img.shape) == 3
    depth = img.shape[2]
    assert depth <= color_table.shape[1]
    assert depth == 1 or depth == 3 or depth == 4
    ## Set red channel to 1 if image is GRY.
    if depth == 1:
      color_table[:, 0] = 1
    num_colors = color_table.shape[0]
    for num_boxes in range(1, num_colors + 2):
      # Generate draw_bounding_box_op drawn image
      image = np.copy(img)
      color = color_table[(num_boxes - 1) % num_colors, 0:depth]
      test_drawn_image = self._fillBorder(image, color)
      bboxes = np.asarray([0, 0, 1, 1])
      bboxes = np.vstack([bboxes for _ in range(num_boxes)])
      bboxes = math_ops.to_float(bboxes)
      bboxes = array_ops.expand_dims(bboxes, 0)
      image = ops.convert_to_tensor(image)
      image = image_ops_impl.convert_image_dtype(image, dtypes.float32)
      image = array_ops.expand_dims(image, 0)
      image = image_ops.draw_bounding_boxes(image, bboxes)
      with self.test_session(use_gpu=False) as sess:
        op_drawn_image = np.squeeze(sess.run(image), 0)
        self.assertAllEqual(test_drawn_image, op_drawn_image)
コード例 #18
0
 def center_bias(self, center_bias_var, gradients, hessians):
   # For in memory, we already have a full batch of gradients and hessians,
   # so just take a mean and proceed with centering.
   mean_gradients = array_ops.expand_dims(
       math_ops.reduce_mean(gradients, 0), 0)
   mean_heassians = array_ops.expand_dims(math_ops.reduce_mean(hessians, 0), 0)
   return self._center_bias_fn(center_bias_var, mean_gradients, mean_heassians)
コード例 #19
0
  def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder):
    shape = list(shape)
    assert shape[-1] == shape[-2]

    batch_shape = shape[:-2]
    num_rows = shape[-1]

    # Uniform values that are at least length 1 from the origin.  Allows the
    # operator to be well conditioned.
    # Shape batch_shape
    multiplier = linear_operator_test_util.random_sign_uniform(
        shape=batch_shape, minval=1., maxval=2., dtype=dtype)

    operator = linalg_lib.LinearOperatorScaledIdentity(num_rows, multiplier)

    # Nothing to feed since LinearOperatorScaledIdentity takes no Tensor args.
    if use_placeholder:
      multiplier_ph = array_ops.placeholder(dtype=dtype)
      multiplier = multiplier.eval()
      operator = linalg_lib.LinearOperatorScaledIdentity(
          num_rows, multiplier_ph)
      feed_dict = {multiplier_ph: multiplier}
    else:
      feed_dict = None

    multiplier_matrix = array_ops.expand_dims(
        array_ops.expand_dims(multiplier, -1), -1)
    mat = multiplier_matrix * linalg_ops.eye(
        num_rows, batch_shape=batch_shape, dtype=dtype)

    return operator, mat, feed_dict
コード例 #20
0
ファイル: pooling.py プロジェクト: ChengYuXiang/tensorflow
  def call(self, inputs):
    # There is no TF op for 1D pooling, hence we make the inputs 4D.
    if self.data_format == 'channels_last':
      # input is NWC, make it NHWC
      inputs = array_ops.expand_dims(inputs, 1)
      # pool on the W dim
      pool_shape = (1, 1) + self.pool_size + (1,)
      strides = (1, 1) + self.strides + (1,)
      data_format = 'NHWC'
    else:
      # input is NCW, make it NCHW
      inputs = array_ops.expand_dims(inputs, 2)
      # pool on the W dim
      pool_shape = (1, 1, 1) + self.pool_size
      strides = (1, 1, 1) + self.strides
      data_format = 'NCHW'

    outputs = self.pool_function(
        inputs,
        ksize=pool_shape,
        strides=strides,
        padding=self.padding.upper(),
        data_format=data_format)

    if self.data_format == 'channels_last':
      return array_ops.squeeze(outputs, 1)
    else:
      return array_ops.squeeze(outputs, 2)
コード例 #21
0
ファイル: shape_ops.py プロジェクト: AlbertXiebnu/tensorflow
def frames(signal, frame_length, frame_step, name=None):
  """Frame a signal into overlapping frames.

  May be used in front of spectral functions.

  For example:

  ```python
  pcm = tf.placeholder(tf.float32, [None, 9152])
  frames = tf.contrib.signal.frames(pcm, 512, 180)
  magspec = tf.abs(tf.spectral.rfft(frames, [512]))
  image = tf.expand_dims(magspec, 3)
  ```

  Args:
    signal: A `Tensor` of shape `[batch_size, signal_length]`.
    frame_length: An `int32` or `int64` `Tensor`. The length of each frame.
    frame_step: An `int32` or `int64` `Tensor`. The step between frames.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`.

  Raises:
    ValueError: if signal does not have rank 2.
  """
  with ops.name_scope(name, "frames", [signal, frame_length, frame_step]):
    signal = ops.convert_to_tensor(signal, name="signal")
    frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
    frame_step = ops.convert_to_tensor(frame_step, name="frame_step")

    signal_rank = signal.shape.ndims

    if signal_rank != 2:
      raise ValueError("expected signal to have rank 2 but was " + signal_rank)

    signal_length = array_ops.shape(signal)[1]

    num_frames = math_ops.ceil((signal_length - frame_length) / frame_step)
    num_frames = 1 + math_ops.cast(num_frames, dtypes.int32)

    pad_length = (num_frames - 1) * frame_step + frame_length
    pad_signal = array_ops.pad(signal, [[0, 0], [0,
                                                 pad_length - signal_length]])

    indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0)
    indices_frames = array_ops.tile(indices_frame, [num_frames, 1])

    indices_step = array_ops.expand_dims(
        math_ops.range(num_frames) * frame_step, 1)
    indices_steps = array_ops.tile(indices_step, [1, frame_length])

    indices = indices_frames + indices_steps

    # TODO(androbin): remove `transpose` when `gather` gets `axis` support
    pad_signal = array_ops.transpose(pad_signal)
    signal_frames = array_ops.gather(pad_signal, indices)
    signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1])

    return signal_frames
コード例 #22
0
ファイル: uniform.py プロジェクト: AliMiraftab/tensorflow
 def _sample_n(self, n, seed=None):
   shape = array_ops.concat(([n], self.batch_shape()), 0)
   samples = random_ops.random_uniform(shape=shape,
                                       dtype=self.dtype,
                                       seed=seed)
   return (array_ops.expand_dims(self.a, 0) +
           array_ops.expand_dims(self.range(), 0) * samples)
コード例 #23
0
def _mask_probs(probs, eos_token, finished):
  """Masks log probabilities.

  The result is that finished beams allocate all probability mass to eos and
  unfinished beams remain unchanged.

  Args:
    probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]`
    eos_token: An int32 id corresponding to the EOS token to allocate
      probability to.
    finished: A boolean tensor of shape `[batch_size, beam_width]` that
      specifies which elements in the beam are finished already.

  Returns:
    A tensor of shape `[batch_size, beam_width, vocab_size]`, where unfinished
    beams stay unchanged and finished beams are replaced with a tensor with all
    probability on the EOS token.
  """
  vocab_size = array_ops.shape(probs)[2]
  finished_mask = math_ops.cast(array_ops.expand_dims(finished, 2), probs.dtype)
  not_finished_mask = math_ops.cast(
      array_ops.expand_dims(math_ops.logical_not(finished), 2),
      probs.dtype)
  # These examples are not finished and we leave them
  non_finished_examples = not_finished_mask * probs
  # All finished examples are replaced with a vector that has all
  # probability on EOS
  finished_row = array_ops.one_hot(
      eos_token,
      vocab_size,
      dtype=probs.dtype,
      on_value=0.,
      off_value=probs.dtype.min)
  finished_examples = finished_mask * finished_row
  return finished_examples + non_finished_examples
コード例 #24
0
def _build_multilabel_adjacency(sparse_labels):
  """Builds multilabel adjacency matrix.

  As of March 14th, 2017, there's no op for the dot product between
  two sparse tensors in TF. However, there is `sparse_minimum` op which is
  equivalent to an AND op between two sparse boolean tensors.
  This computes the dot product between two sparse boolean inputs.

  Args:
    sparse_labels: List of 1-D boolean sparse tensors.

  Returns:
    adjacency_matrix: 2-D dense `Tensor`.
  """
  num_pairs = len(sparse_labels)
  adjacency_matrix = array_ops.zeros([num_pairs, num_pairs])
  for i in range(num_pairs):
    for j in range(num_pairs):
      sparse_dot_product = math_ops.to_float(
          sparse_ops.sparse_reduce_sum(sparse_ops.sparse_minimum(
              sparse_labels[i], sparse_labels[j])))
      sparse_dot_product = array_ops.expand_dims(sparse_dot_product, 0)
      sparse_dot_product = array_ops.expand_dims(sparse_dot_product, 1)
      one_hot_matrix = array_ops.pad(sparse_dot_product,
                                     [[i, num_pairs-i-1],
                                      [j, num_pairs-j-1]], 'CONSTANT')
      adjacency_matrix += one_hot_matrix

  return adjacency_matrix
コード例 #25
0
ファイル: callbacks.py プロジェクト: LugarkPirog/tensorflow
  def set_model(self, model):
    self.model = model
    self.sess = K.get_session()
    if self.histogram_freq and self.merged is None:
      for layer in self.model.layers:

        for weight in layer.weights:
          tf_summary.histogram(weight.name, weight)
          if self.write_images:
            w_img = array_ops.squeeze(weight)
            shape = w_img.get_shape()
            if len(shape) > 1 and shape[0] > shape[1]:
              w_img = array_ops.transpose(w_img)
            if len(shape) == 1:
              w_img = array_ops.expand_dims(w_img, 0)
            w_img = array_ops.expand_dims(array_ops.expand_dims(w_img, 0), -1)
            tf_summary.image(weight.name, w_img)

        if hasattr(layer, 'output'):
          tf_summary.histogram('{}_out'.format(layer.name), layer.output)
    self.merged = tf_summary.merge_all()

    if self.write_graph:
      self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph)
    else:
      self.writer = tf_summary.FileWriter(self.log_dir)
コード例 #26
0
 def testCrfSequenceScore(self):
   transition_params = np.array(
       [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
   # Test both the length-1 and regular cases.
   sequence_lengths_list = [
       np.array(3, dtype=np.int32),
       np.array(1, dtype=np.int32)
   ]
   inputs_list = [
       np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
                dtype=np.float32),
       np.array([[4, 5, -3]],
                dtype=np.float32),
   ]
   tag_indices_list = [
       np.array([1, 2, 1, 0], dtype=np.int32),
       np.array([1], dtype=np.int32)
   ]
   for sequence_lengths, inputs, tag_indices in zip(sequence_lengths_list,
                                                    inputs_list,
                                                    tag_indices_list):
     with self.test_session() as sess:
       sequence_score = crf.crf_sequence_score(
           inputs=array_ops.expand_dims(inputs, 0),
           tag_indices=array_ops.expand_dims(tag_indices, 0),
           sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
           transition_params=constant_op.constant(transition_params))
       sequence_score = array_ops.squeeze(sequence_score, [0])
       tf_sequence_score = sess.run(sequence_score)
       expected_sequence_score = self.calculateSequenceScore(
           inputs, transition_params, tag_indices, sequence_lengths)
       self.assertAllClose(tf_sequence_score, expected_sequence_score)
コード例 #27
0
ファイル: dirichlet.py プロジェクト: yxiong/tensorflow
 def _variance(self):
     scale = self.alpha_sum * math_ops.sqrt(1.0 + self.alpha_sum)
     alpha = self.alpha / scale
     outer_prod = -math_ops.batch_matmul(
         array_ops.expand_dims(alpha, dim=-1), array_ops.expand_dims(alpha, dim=-2)  # column
     )  # row
     return array_ops.batch_matrix_set_diag(outer_prod, alpha * (self.alpha_sum / scale - alpha))
コード例 #28
0
 def loop_fn(i):
   loop_inputs = [
       array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs
   ]
   loop_init_state = rnn_cell.LSTMStateTuple(
       *[array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state])
   return model_fn(loop_inputs, loop_init_state)
コード例 #29
0
  def __init__(self,
               num_rows,
               multiplier,
               is_non_singular=None,
               is_self_adjoint=None,
               is_positive_definite=None,
               assert_proper_shapes=False,
               name="LinearOperatorScaledIdentity"):
    """Initialize a `LinearOperatorScaledIdentity`.

    The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which
    determines the size of each identity matrix, and a `multiplier`,
    which defines `dtype`, batch shape, and scale of each matrix.

    This operator is able to broadcast the leading (batch) dimensions.

    Args:
      num_rows:  Scalar non-negative integer `Tensor`.  Number of rows in the
        corresponding identity matrix.
      multiplier:  `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar).
      is_non_singular:  Expect that this operator is non-singular.
      is_self_adjoint:  Expect that this operator is equal to its hermitian
        transpose.
      is_positive_definite:  Expect that this operator is positive definite.
      assert_proper_shapes:  Python `bool`.  If `False`, only perform static
        checks that initialization and method arguments have proper shape.
        If `True`, and static checks are inconclusive, add asserts to the graph.
      name: A name for this `LinearOperator`

    Raises:
      ValueError:  If `num_rows` is determined statically to be non-scalar, or
        negative.
    """
    self._assert_proper_shapes = assert_proper_shapes

    with ops.name_scope(name, values=[multiplier, num_rows]):
      self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier")

      super(LinearOperatorScaledIdentity, self).__init__(
          dtype=self._multiplier.dtype,
          is_non_singular=is_non_singular,
          is_self_adjoint=is_self_adjoint,
          is_positive_definite=is_positive_definite,
          name=name)

      # Shape [B1,...Bb, 1, 1]
      self._multiplier_matrix = array_ops.expand_dims(
          array_ops.expand_dims(self.multiplier, -1), -1)
      self._multiplier_matrix_conj = math_ops.conj(
          self._multiplier_matrix)
      self._abs_multiplier = math_ops.abs(self.multiplier)

      self._num_rows = linear_operator_util.shape_tensor(
          num_rows, name="num_rows")
      self._num_rows_static = tensor_util.constant_value(self._num_rows)
      self._check_num_rows_possibly_add_asserts()
      self._num_rows_cast_to_dtype = math_ops.cast(self._num_rows, self.dtype)
      self._num_rows_cast_to_real_dtype = math_ops.cast(
          self._num_rows, self.dtype.real_dtype)
コード例 #30
0
ファイル: losses.py プロジェクト: AndrewTwinz/tensorflow
def per_example_maxent_loss(labels, weights, logits, num_classes, eps=1e-15):
  """Maximum entropy loss for multiclass problems.

  Maximum entropy is a generalization of logistic loss for the case when more
  than 2 classes are present.

  Args:
    labels: Rank 2 (N, 1) or Rank 1 (N) tensor of per-example labels.
    weights: Rank 2 (N, 1) tensor of per-example weights.
    logits: Rank 2 (N, K) tensor of per-example predictions, K - num of
    classes.
    num_classes: number of classes in classification task. Used to expand label
    indices into one-hot encodings.
    eps: tolerance, used as a minimum possible value.

  Returns:
    loss: A Rank 2 (N, 1) tensor of per-example maxent loss
    update_op: An update operation to update the loss's internal state.
  """
  labels = math_ops.to_int64(labels)
  # If labels are of rank 1, make them rank 2.
  labels_shape = labels.get_shape()
  if len(labels_shape) != 2:
    labels = array_ops.expand_dims(labels, 1)
  # Labels are indices of classes, convert them to one hot encodings.
  target_one_hot = array_ops.one_hot(indices=labels, depth=num_classes)
  labels = math_ops.reduce_sum(
      input_tensor=target_one_hot, reduction_indices=[1])
  labels = math_ops.to_float(labels)

  # Calculate softmax probabilities for each class.
  unnormalized_probs = math_ops.exp(logits)
  normalizers = math_ops.reduce_sum(unnormalized_probs, 1, keepdims=True)
  softmax_predictions = math_ops.divide(unnormalized_probs,
                                        math_ops.add(normalizers, eps))

  # Pull out the probabilities for real label.
  probs_for_real_class = math_ops.reduce_sum(labels * softmax_predictions, 1)

  # Add handling for values near 0 and 1.
  zeros = array_ops.zeros_like(probs_for_real_class, dtype=logits.dtype) + eps
  one_minus_eps = array_ops.ones_like(
      probs_for_real_class, dtype=logits.dtype) - eps

  # Take maximum(eps, pred)
  cond = (probs_for_real_class >= eps)
  probs_for_real_class = array_ops.where(cond, probs_for_real_class, zeros)

  # Take minimum(1-eps, pred)
  cond = (probs_for_real_class <= 1 - eps)
  probs_for_real_class = array_ops.where(cond, probs_for_real_class,
                                         one_minus_eps)

  unweighted_loss = array_ops.expand_dims(-math_ops.log(probs_for_real_class),
                                          1)
  if weights is None:
    return unweighted_loss, control_flow_ops.no_op()
  else:
    return unweighted_loss * weights, control_flow_ops.no_op()
コード例 #31
0
    def __init__(self,
                 cell,
                 embedding,
                 start_tokens,
                 end_token,
                 initial_state,
                 beam_width,
                 output_layer=None,
                 emo_output_layer=None,
                 emo_choice_layer=None,
                 length_penalty_weight=0.0):
        """Initialize the ECMBeamSearchDecoder.

        Args:
          cell: An `RNNCell` instance.
          embedding: A callable that takes a vector tensor of `ids` (argmax ids),
            or the `params` argument for `embedding_lookup`.
          start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
          end_token: `int32` scalar, the token that marks end of decoding.
          initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
          beam_width:  Python integer, the number of beams.
          output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
            `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
            to storing the result or sampling.
          length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

        Raises:
          TypeError: if `cell` is not an instance of `RNNCell`,
            or `output_layer` is not an instance of `tf.layers.Layer`.
          ValueError: If `start_tokens` is not a vector or
            `end_token` is not a scalar.
        """
        if not rnn_cell_impl._like_rnncell(cell):  # pylint: disable=protected-access
            raise TypeError("cell must be an RNNCell, received: %s" %
                            type(cell))
        if (output_layer is not None
                and not isinstance(output_layer, layers_base.Layer)):
            raise TypeError("output_layer must be a Layer, received: %s" %
                            type(output_layer))
        self._cell = cell
        self._output_layer = output_layer  # 普通词典projection
        # ECM output layer
        self._emo_output_layer = emo_output_layer  # 情感词典projection
        self._emo_choice_layer = emo_choice_layer  # 选择情感词概率的 projection,输出(0,1)之间的概率

        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._start_tokens = ops.convert_to_tensor(start_tokens,
                                                   dtype=dtypes.int32,
                                                   name="start_tokens")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._end_token = ops.convert_to_tensor(end_token,
                                                dtype=dtypes.int32,
                                                name="end_token")
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        self._batch_size = array_ops.size(start_tokens)
        self._beam_width = beam_width
        self._length_penalty_weight = length_penalty_weight
        self._initial_cell_state = nest.map_structure(
            self._maybe_split_batch_beams, initial_state,
            self._cell.state_size)
        self._start_tokens = array_ops.tile(
            array_ops.expand_dims(self._start_tokens, 1),
            [1, self._beam_width])
        self._start_inputs = self._embedding_fn(self._start_tokens)
        self._finished = array_ops.zeros([self._batch_size, self._beam_width],
                                         dtype=dtypes.bool)
コード例 #32
0
  def __init__(self,
               cell,
               embedding,
               start_tokens,
               end_token,
               initial_state,
               beam_width,
               output_layer=None,
               length_penalty_weight=0.0,
               reorder_tensor_arrays=True):
    """Initialize the BeamSearchDecoder.

    Args:
      cell: An `RNNCell` instance.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
      beam_width:  Python integer, the number of beams.
      output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
        to storing the result or sampling.
      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
      reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell
        state will be reordered according to the beam search path. If the
        `TensorArray` can be reordered, the stacked form will be returned.
        Otherwise, the `TensorArray` will be returned as is. Set this flag to
        `False` if the cell state contains `TensorArray`s that are not amenable
        to reordering.

    Raises:
      TypeError: if `cell` is not an instance of `RNNCell`,
        or `output_layer` is not an instance of `tf.layers.Layer`.
      ValueError: If `start_tokens` is not a vector or
        `end_token` is not a scalar.
    """
    rnn_cell_impl.assert_like_rnncell("cell", cell)  # pylint: disable=protected-access
    if (output_layer is not None and
        not isinstance(output_layer, layers_base.Layer)):
      raise TypeError(
          "output_layer must be a Layer, received: %s" % type(output_layer))
    self._cell = cell
    self._output_layer = output_layer
    self._reorder_tensor_arrays = reorder_tensor_arrays

    if callable(embedding):
      self._embedding_fn = embedding
    else:
      self._embedding_fn = (
          lambda ids: embedding_ops.embedding_lookup(embedding, ids))

    self._start_tokens = ops.convert_to_tensor(
        start_tokens, dtype=dtypes.int32, name="start_tokens")
    if self._start_tokens.get_shape().ndims != 1:
      raise ValueError("start_tokens must be a vector")
    self._end_token = ops.convert_to_tensor(
        end_token, dtype=dtypes.int32, name="end_token")
    if self._end_token.get_shape().ndims != 0:
      raise ValueError("end_token must be a scalar")

    self._batch_size = array_ops.size(start_tokens)
    self._beam_width = beam_width
    self._length_penalty_weight = length_penalty_weight
    self._initial_cell_state = nest.map_structure(
        self._maybe_split_batch_beams, initial_state, self._cell.state_size)
    self._start_tokens = array_ops.tile(
        array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width])
    self._start_inputs = self._embedding_fn(self._start_tokens)

    self._finished = array_ops.one_hot(
        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
        depth=self._beam_width,
        on_value=False,
        off_value=True,
        dtype=dtypes.bool)
コード例 #33
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
  """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
  static_batch_size = tensor_util.constant_value(batch_size)

  # Calculate the current lengths of the predictions
  prediction_lengths = beam_state.lengths
  previously_finished = beam_state.finished

  # Calculate the total log probs for the new hypotheses
  # Final Shape: [batch_size, beam_width, vocab_size]
  step_log_probs = nn_ops.log_softmax(logits)
  step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished)
  total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs

  # Calculate the continuation lengths by adding to all continuing beams.
  vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
  lengths_to_add = array_ops.one_hot(
      indices=array_ops.fill([batch_size, beam_width], end_token),
      depth=vocab_size,
      on_value=np.int64(0),
      off_value=np.int64(1),
      dtype=dtypes.int64)
  add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished))
  lengths_to_add *= array_ops.expand_dims(add_mask, 2)
  new_prediction_lengths = (
      lengths_to_add + array_ops.expand_dims(prediction_lengths, 2))

  # Calculate the scores for each beam
  scores = _get_scores(
      log_probs=total_probs,
      sequence_lengths=new_prediction_lengths,
      length_penalty_weight=length_penalty_weight)

  time = ops.convert_to_tensor(time, name="time")
  # During the first time step we only consider the initial beam
  scores_flat = array_ops.reshape(scores, [batch_size, -1])

  # Pick the next beams according to the specified successors function
  next_beam_size = ops.convert_to_tensor(
      beam_width, dtype=dtypes.int32, name="beam_width")
  next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size)

  next_beam_scores.set_shape([static_batch_size, beam_width])
  word_indices.set_shape([static_batch_size, beam_width])

  # Pick out the probs, beam_ids, and states according to the chosen predictions
  next_beam_probs = _tensor_gather_helper(
      gather_indices=word_indices,
      gather_from=total_probs,
      batch_size=batch_size,
      range_size=beam_width * vocab_size,
      gather_shape=[-1],
      name="next_beam_probs")
  # Note: just doing the following
  #   math_ops.to_int32(word_indices % vocab_size,
  #       name="next_beam_word_ids")
  # would be a lot cleaner but for reasons unclear, that hides the results of
  # the op which prevents capturing it with tfdbg debug ops.
  raw_next_word_ids = math_ops.mod(
      word_indices, vocab_size, name="next_beam_word_ids")
  next_word_ids = math_ops.to_int32(raw_next_word_ids)
  next_beam_ids = math_ops.to_int32(
      word_indices / vocab_size, name="next_beam_parent_ids")

  # Append new ids to current predictions
  previously_finished = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=previously_finished,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_finished = math_ops.logical_or(
      previously_finished,
      math_ops.equal(next_word_ids, end_token),
      name="next_beam_finished")

  # Calculate the length of the next predictions.
  # 1. Finished beams remain unchanged.
  # 2. Beams that are now finished (EOS predicted) have their length
  #    increased by 1.
  # 3. Beams that are not yet finished have their length increased by 1.
  lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished))
  next_prediction_len = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=beam_state.lengths,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_prediction_len += lengths_to_add

  # Pick out the cell_states according to the next_beam_ids. We use a
  # different gather_shape here because the cell_state tensors, i.e.
  # the tensors that would be gathered from, all have dimension
  # greater than two and we need to preserve those dimensions.
  # pylint: disable=g-long-lambda
  next_cell_state = nest.map_structure(
      lambda gather_from: _maybe_tensor_gather_helper(
          gather_indices=next_beam_ids,
          gather_from=gather_from,
          batch_size=batch_size,
          range_size=beam_width,
          gather_shape=[batch_size * beam_width, -1]),
      next_cell_state)
  # pylint: enable=g-long-lambda

  next_state = BeamSearchDecoderState(
      cell_state=next_cell_state,
      log_probs=next_beam_probs,
      lengths=next_prediction_len,
      finished=next_finished)

  output = BeamSearchDecoderOutput(
      scores=next_beam_scores,
      predicted_ids=next_word_ids,
      parent_ids=next_beam_ids)

  return output, next_state
コード例 #34
0
ファイル: loss_functions.py プロジェクト: sgcm520/tensorflow2
 def multiply_fisher_factor_replicated_one_hot(self, index):
   assert len(index) == 1, "Length of index was {}".format(len(index))
   probs_slice = array_ops.expand_dims(self._probs[:, index[0]], -1)
   output_slice = math_ops.sqrt(probs_slice * (1 - probs_slice))
   return insert_slice_in_zeros(output_slice, 1, int(self._logits.shape[1]),
                                index[0])
コード例 #35
0
ファイル: base_head.py プロジェクト: snath99920/Chatbot-
def check_dense_labels_match_logits_and_reshape(labels, logits,
                                                expected_labels_dimension):
    """Checks labels shape matches logits, and reshapes if needed.

  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Then labels
  shape must be [D0, D1, ... DN, expected_labels_dimension].
  If expected_labels_dimension=1, labels could be [D0, D1, ... DN] and this
  method reshapes them to [D0, D1, ... DN, 1].

  Args:
    labels: labels Tensor.
    logits: logits Tensor.
    expected_labels_dimension: Integer.

  Returns:
    Validated and reshaped labels Tensor.

  Raises:
    ValueError: If labels is a SparseTensor.
    ValueError: If labels shape is statically defined and fails validation.
    OpError: If labels shape is not statically defined and fails validation.
  """
    if labels is None:
        raise ValueError(_LABEL_NONE_ERR_MSG)
    with ops.name_scope('labels', values=(labels, logits)) as scope:
        labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels)
        if isinstance(labels, sparse_tensor.SparseTensor):
            raise ValueError(
                _SPARSE_LABEL_ERR_MSG.format(expected_labels_dimension,
                                             expected_labels_dimension,
                                             expected_labels_dimension))
        # Eager mode.
        if context.executing_eagerly():
            labels_rank = labels._rank()  # pylint: disable=protected-access
            logits_rank = logits._rank()  # pylint: disable=protected-access
            if (labels_rank is not None and logits_rank is not None
                    and labels_rank == logits_rank - 1):
                labels = array_ops.expand_dims(labels, -1)
                labels_rank += 1
            labels_shape = labels._shape_tuple()  # pylint: disable=protected-access
            if labels_rank < 2:
                raise ValueError(
                    'labels must have rank at least 2.  Received rank {}, '
                    'shape {}'.format(labels_rank, labels_shape))
            if labels_shape[-1] != expected_labels_dimension:
                raise ValueError(
                    _MISMATCHED_LABEL_DIM_ERR_MSG.format(
                        expected_labels_dimension, labels_shape[-1]))
            logits_shape = logits._shape_tuple()  # pylint: disable=protected-access
            expected_labels_shape = logits_shape[:-1] + (
                expected_labels_dimension, )
            if expected_labels_shape != labels_shape:
                raise ValueError(
                    '{}, expected_labels_shape: {}. labels_shape: {}.'.format(
                        _LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension),
                        expected_labels_shape, labels_shape))
            return labels

        # Graph mode.
        if (labels.shape.ndims is not None and logits.shape.ndims is not None
                and labels.shape.ndims == logits.shape.ndims - 1):
            labels = array_ops.expand_dims(labels, -1)
        assert_rank = check_ops.assert_rank_at_least(
            labels,
            2,
            message=_LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension))
        with ops.control_dependencies([assert_rank]):
            static_shape = labels.shape
            if static_shape.ndims is not None:
                final_dim = static_shape[-1]
                if (final_dim is not None) and (final_dim !=
                                                expected_labels_dimension):
                    raise ValueError(
                        _MISMATCHED_LABEL_DIM_ERR_MSG.format(
                            expected_labels_dimension, final_dim))
            logits_shape = array_ops.shape(logits)
            expected_labels_shape = array_ops.concat(
                [logits_shape[:-1], [expected_labels_dimension]], axis=0)
            labels_shape = array_ops.shape(labels)
            assert_dimension = check_ops.assert_equal(
                expected_labels_shape,
                labels_shape,
                message=_LABEL_SHAPE_ERR_MSG.format(expected_labels_dimension),
                data=[
                    'expected_labels_shape: ', expected_labels_shape,
                    'labels_shape: ', labels_shape
                ])
            with ops.control_dependencies([assert_dimension]):
                return array_ops.identity(labels, name=scope)
コード例 #36
0
ファイル: ragged_math_ops.py プロジェクト: saikiran2711/Web
def ragged_reduce_aggregate(reduce_op,
                            unsorted_segment_op,
                            rt_input,
                            axis,
                            keepdims,
                            separator=None,
                            name=None):
    """Aggregates across axes of a RaggedTensor using the given `Tensor` ops.

  Reduces `rt_input` along the dimensions given in `axis`.  The rank of the
  tensor is reduced by 1 for each entry in `axis`.  If `axis` is not specified,
  then all dimensions are reduced, and a scalar value is returned.

  This op assumes that `reduce_op` and `unsorted_segment_op` are associative;
  if not, then reducing multiple axes will return incorrect results.  (In
  particular, reducing multiple axes is currently implemented by reducing the
  axes one at a time.)

  Args:
    reduce_op: The tensorflow `op` that should be used to reduce values in
      uniform dimensions.  Must have the same signature and basic behavior as
      `reduce_sum`, `reduce_max`, etc.
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in ragged dimensions.  Must have the same signature and basic
      behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced.
    axis: The axis or axes to reduce.  May be `None` (to reduce all axes), an
      `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a
      given set of axes), or a `Tensor` with a constant value.  Must be in the
      range `[0, rt_input.rank)`.
    keepdims: If true, retains reduced dimensions with length 1.
    separator: An optional string. Defaults to None. The separator to use when
      joining. The separator must not be set for non-string data types. (i.e. if
      separator is not None then it uses string ops)
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the reduced values.  The returned tensor
    has the same dtype as `data`, and its shape is given by removing the
    dimensions specified in `axis` from `rt_input.shape`.  The `ragged_rank`
    of the returned tensor is given by substracting any ragged dimensions
    specified in `axis` from `rt_input.ragged_rank`.
  Raises:
    ValueError: If `axis` contains a `Tensor` whose value is not constant.
  """
    if not ragged_tensor.is_ragged(rt_input):
        if separator is None:
            return reduce_op(rt_input, axis, keepdims=keepdims, name=name)
        else:
            # When separator is not None, We infer that dtype is string and
            # reduce_join will be called.
            return reduce_op(rt_input,
                             axis,
                             keepdims=keepdims,
                             name=name,
                             separator=separator)

    if isinstance(axis, ops.Tensor):
        axis = tensor_util.constant_value(axis)
        if axis is None:
            raise ValueError('axis must be known at graph construction time.')
        if isinstance(axis, np.ndarray):
            axis = axis.tolist()

    # When reducing all axes, just ignore splits & reduce the inner values.
    if axis is None:
        result = reduce_op(rt_input.flat_values,
                           None,
                           keepdims=keepdims,
                           name=name)
        if keepdims:
            # Expand the result to the input number of dimensions.
            for _ in rt_input.shape[1:]:
                result = array_ops.expand_dims(result, axis=0)
        return result

    with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
        if isinstance(axis, (tuple, list)):
            if not axis:
                return rt_input
            elif len(axis) == 1:
                axis = axis[0]
            else:
                # When reducing multiple axes, as we reduce one at a time (see below),
                # the negative axis has to be converted to positive at the first run
                # as the sort with negative axis will have different orders.
                # See GitHub issue 27497.
                axis = [
                    array_ops.get_positive_axis(a, rt_input.shape.ndims,
                                                'axis[%s]' % i,
                                                'rank(input_tensor)')
                    for i, a in enumerate(axis)
                ]
                # When reducing multiple axes, just reduce one at a time.  This is less
                # efficient, and only works for associative ops.  (In particular, it
                # does not work for reduce_mean.)  However, reducing multiple axes at
                # once will probably require a nontrivial c++ op.
                axis = sorted(axis)
                inner_reduced = ragged_reduce_aggregate(
                    reduce_op, unsorted_segment_op, rt_input, axis[-1],
                    keepdims, separator)
                return ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                               inner_reduced, axis[:-1],
                                               keepdims, separator)

        rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            rt_input, name='rt_input')

        axis = array_ops.get_positive_axis(axis,
                                           rt_input.shape.ndims,
                                           ndims_name='rank(input_tensor)')

        if axis == 0:
            # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
            row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
            num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths),
                                            0)
            segment_ids = range(row_lengths).values
            result = _ragged_segment_aggregate(unsorted_segment_op,
                                               rt_input.values, segment_ids,
                                               num_segments, separator)
            if keepdims:
                result = array_ops.expand_dims(result, axis=0)
            return result
        elif axis == 1:
            # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N]
            num_segments = array_ops.shape(rt_input.row_splits)[0] - 1
            segment_ids = segment_id_ops.row_splits_to_segment_ids(
                rt_input.row_splits)
            result = _ragged_segment_aggregate(unsorted_segment_op,
                                               rt_input.values, segment_ids,
                                               num_segments, separator)
            if keepdims:
                result = array_ops.expand_dims(result, axis=1)
            return result
        else:
            # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] =
            #     sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N]
            return rt_input.with_values(
                ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                        rt_input.values, axis - 1, keepdims,
                                        separator))
コード例 #37
0
ファイル: base_head.py プロジェクト: snath99920/Chatbot-
def get_weights_and_check_match_logits(features,
                                       weight_column,
                                       logits,
                                       allow_per_logit_weights=False):
    """Fetches weights from features and checks that the shape matches logits.

  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape
  can be either:
  * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`.
  * [D0, D1, ... DN, 1]
  * [D0, D1, ... DN]: In this case, weights is reshaped into
    [D0, D1, ... DN, 1] to work with weight broadcasting rules.

  Args:
    features: The features dict that contains weights.
    weight_column: The weight column. If not given, this method returns 1.
    logits: logits Tensor.
    allow_per_logit_weights: Boolean. Whether we allow weights along the logits
      dimension, namely shape `[D0, D1, ... DN, logits_dimension]`.

  Returns:
    Validated and reshaped weights Tensor.

  Raises:
    ValueError: If the weights `Tensor` cannot be cast into float.
  """
    if allow_per_logit_weights:
        err_msg = (
            'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or '
            '[D0, D1, ... DN, logits_dimension]')
    else:
        err_msg = (
            'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]')
    with ops.name_scope('weights',
                        values=tuple(six.itervalues(features)) +
                        (logits, )) as scope:
        # Fetch the weights.
        if weight_column is None:
            return 1.
        # TODO(b/117839674): update feature_column
        if isinstance(weight_column, six.string_types):
            weight_column = feature_column_lib.numeric_column(
                key=weight_column, shape=(1, ))
        if not isinstance(weight_column,
                          (feature_column_lib.NumericColumn, _NumericColumn)):
            raise TypeError(
                'Weight column must be either a string or NumericColumn.'
                ' Given type: {}.'.format(type(weight_column)))
        weights = weight_column._get_dense_tensor(  # pylint: disable=protected-access
            _LazyBuilder(features))
        if not (weights.dtype.is_floating or weights.dtype.is_integer):
            raise ValueError('Weight column should be castable to float. '
                             'Given dtype: {}'.format(weights.dtype))
        weights = math_ops.to_float(weights, name='weights')
        # Validate the weights shape.
        # Eager mode.
        if context.executing_eagerly():
            weights_shape = weights._shape_tuple()  # pylint: disable=protected-access
            logits_shape = logits._shape_tuple()  # pylint: disable=protected-access
            weights_rank = weights._rank()  # pylint: disable=protected-access
            logits_rank = logits._rank()  # pylint: disable=protected-access
            if (weights_rank is not None and logits_rank is not None
                    and weights_rank == logits_rank - 1):
                if logits_shape[:-1] != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
                return array_ops.expand_dims(weights, -1, name=scope)
            supported_weights_shape = logits_shape[:-1] + (1, )
            if allow_per_logit_weights:
                if (logits_shape != weights_shape
                        and supported_weights_shape != weights_shape):
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            else:
                if supported_weights_shape != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            return weights

        # Graph mode.
        weights_shape = array_ops.shape(weights, name='weights_shape')
        logits_shape = array_ops.shape(logits, name='logits_shape')
        if (weights.shape.ndims is not None and logits.shape.ndims is not None
                and weights.shape.ndims == logits.shape.ndims - 1):
            assert_dimension = check_ops.assert_equal(logits_shape[:-1],
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
            with ops.control_dependencies([assert_dimension]):
                return array_ops.expand_dims(weights, -1, name=scope)
        supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]],
                                                   axis=0)
        if allow_per_logit_weights:
            condition = math_ops.reduce_any([
                math_ops.reduce_all(math_ops.equal(logits_shape,
                                                   weights_shape)),
                math_ops.reduce_all(
                    math_ops.equal(supported_weights_shape, weights_shape))
            ])
            assert_dimension = control_flow_ops.Assert(condition=condition,
                                                       data=[
                                                           err_msg,
                                                           'logits_shape: ',
                                                           logits_shape,
                                                           'weights_shape: ',
                                                           weights_shape
                                                       ])
        else:
            assert_dimension = check_ops.assert_equal(supported_weights_shape,
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
        with ops.control_dependencies([assert_dimension]):
            return array_ops.identity(weights, name=scope)
コード例 #38
0
ファイル: embedding_ops.py プロジェクト: wucng/tensorflow-1
def _sampled_scattered_embedding_lookup(params,
                                        values,
                                        dimension=None,
                                        sampled_candidates=None,
                                        hash_key=None,
                                        name=None):
    """Looks up embeddings using parameter hashing for each value in `values`.

  This method looks up selected embedding dimensions if `sampled_candidates` is
  given, otherwise looks up all dimensions.

  The i-th embedding component of a value v in `values` is found by retrieving
  the weight whose index is a fingerprint of the pair (v,i).
  The concept is explored as "feature hashing" for model compression in this
  paper: http://arxiv.org/pdf/1504.04788.pdf

  Feature hashing has the pleasant effect of allowing us to compute an embedding
  without needing a pre-determined vocabulary, relieving some amount of process
  complexity. It also allows for us to maintain embeddings for possibly
  trillions of features with a fixed amount of memory.

  Note that this is superior to out-of-vocabulary shared "hash buckets" in that
  the embedding is extremely likely to be unique for each token as opposed to
  being shared across probably-colliding tokens. The price is that we must
  compute a hash once for each scalar in the token's embedding as opposed to
  once per token.

  If `params` is a list, it represents a partition of the embedding parameters.
  Each tensor in the list should have the same length, except for the first ones
  which may have an additional element. For instance 10 parameters can be
  partitioned in 4 tensors with length `[3, 3, 2, 2]`.

  Args:
    params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`.
      Each tensor must be of rank 1 with fully-defined shape.
    values: `Tensor` of values to be embedded with shape `[d0, ..., dn]`.
    dimension: Embedding dimension. The user must specify either `dimension` or
      `sampled_candidates`.
    sampled_candidates: An optional `Tensor` of slice indices to keep along the
      final dimension with shape `[d0, ..., dn, N]`. If given, `dimension` is
      ignored. If `None`, looks up all candidates.
    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
      function to combine the crosses fingerprints on SparseFeatureCrossOp
      (optional).
    name: An optional name for this op.

  Returns:
    A `Tensor` with shape `[d0, ..., dn, dimension]`.
    If `sampled_candidates` is given, the output shape is `[d0, ..., dn, N]`

  Raises:
    ValueError: if dimension is not positive or the partition size is invalid.
  """
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)
    if not isinstance(params, list):
        params = [params]

    with ops.name_scope(name, "scattered_embedding_lookup",
                        params + [dimension, values]):
        # Flatten the values
        values_shape = array_ops.shape(values)
        values = array_ops.reshape(values, [-1, 1])

        if sampled_candidates is None:
            if dimension is None:
                raise ValueError(
                    "You must specify either dimension or sampled_candidates.")
            if dimension <= 0:
                raise ValueError("Dimension must be >0. Given is %d" %
                                 dimension)
            sampled_candidates = array_ops.tile(
                array_ops.expand_dims(math_ops.range(0, dimension), 0),
                array_ops.shape(values))
        else:
            dimension = array_ops.shape(sampled_candidates)[math_ops.sub(
                array_ops.rank(sampled_candidates), 1)]
            sampled_candidates_shape = array_ops.shape(sampled_candidates)
            dimension_tensor = array_ops.reshape(dimension, shape=[
                1,
            ])
            expected_shape = array_ops.concat_v2(
                [values_shape, dimension_tensor], 0)
            with ops.control_dependencies([
                    control_flow_ops.Assert(
                        math_ops.reduce_all(
                            math_ops.equal(sampled_candidates_shape,
                                           expected_shape)),
                        [
                            "The shape of sampled_candidates: ",
                            sampled_candidates_shape,
                            " does not match the shape of values: ",
                            values_shape
                        ])
            ]):
                # Flatten sampled_candidates, same way as values are flattened.
                sampled_candidates = array_ops.reshape(sampled_candidates,
                                                       [-1, dimension])

        num_partitions = len(params)
        partition_sizes = []
        for p in range(num_partitions):
            shape = params[p].get_shape()
            shape.assert_has_rank(1)
            shape.assert_is_fully_defined()
            partition_sizes.append(shape[0].value)
        num_params = sum(partition_sizes)  # Total number of parameters.

        # Assert the size of each partition.
        for p in range(num_partitions):
            expected_size = (num_params - p - 1) // num_partitions + 1
            if partition_sizes[p] != expected_size:
                raise ValueError(
                    "Tensor %d in params has size %d, expected %d." %
                    (p, partition_sizes[p], expected_size))

        # With two values v1 and v2 and 3 dimensions, we will cross
        # [[0, 1, 2], [0, 1, 2]] with [[v1], [v2]].
        tensors_to_cross = [sampled_candidates, values]
        ids = sparse_feature_cross_op.sparse_feature_cross(
            tensors_to_cross,
            hashed_output=True,
            num_buckets=num_params,
            hash_key=hash_key)
        ids = sparse_ops.sparse_tensor_to_dense(ids)

        # No need to validate the indices since we have checked the params
        # dimensions and we know the largest id.
        result = embedding_ops.embedding_lookup(params,
                                                ids,
                                                partition_strategy="div",
                                                validate_indices=False)

        return array_ops.reshape(
            result, array_ops.concat_v2([values_shape, [dimension]], 0))
 def _tile(feature):
     return array_ops.squeeze(array_ops.tile(
         array_ops.expand_dims(feature, 1), [1, num_unroll, 1]),
                              axis=2)
コード例 #40
0
        def unit(hidden_state):

            hidden_state_expanded_attn = tf.tile(
                array_ops.expand_dims(hidden_state, 1),
                [1, tf.shape(self.encoder_states)[1], 1])
            attn_rep = tf.concat(
                [self.encoder_states, hidden_state_expanded_attn], axis=2)
            attn_rep = tf.nn.tanh(
                tf.einsum(
                    'ijk,kl->ijl',
                    tf.nn.tanh(tf.einsum("ijk,kl->ijl", attn_rep, self.W1)),
                    self.W2))
            u_i = tf.squeeze(tf.einsum('ijk,kl->ijl', attn_rep, self.w), 2)
            inp_len_mask = tf.sequence_mask(self.inp_len,
                                            tf.shape(self.inp_utt)[2],
                                            dtype=tf.float32)
            attn_mask = tf.reshape(inp_len_mask, shape=[self.batch_size, -1])
            exp_u_i_masked = tf.multiply(
                tf.cast(attn_mask, dtype=tf.float64),
                tf.exp(tf.cast(u_i, dtype=tf.float64)))
            a = tf.cast(tf.einsum('i,ij->ij',
                                  tf.pow(tf.reduce_sum(exp_u_i_masked, 1), -1),
                                  exp_u_i_masked),
                        dtype=tf.float32)
            inp_attn = tf.reduce_sum(
                tf.einsum('ij,ijk->ijk', a, self.encoder_states), 1)

            generate_dist = tf.nn.softmax(
                math_ops.matmul(tf.concat([hidden_state, inp_attn], axis=1),
                                self.U) + self.b1)
            extra_zeros = tf.zeros(
                [self.batch_size, self.out_vocab_size - self.generate_size])
            extended_generate_dist = tf.concat([generate_dist, extra_zeros],
                                               axis=1)

            hidden_state_expanded_result = tf.tile(
                array_ops.expand_dims(hidden_state, 1),
                [1, tf.shape(self.kb)[1], 1])
            inp_attn_expanded_result = tf.tile(
                array_ops.expand_dims(inp_attn, 1),
                [1, tf.shape(self.kb)[1], 1])
            result_attn_rep = tf.concat([
                self.result_rep, hidden_state_expanded_result,
                inp_attn_expanded_result
            ],
                                        axis=2)
            result_attn_rep = tf.nn.tanh(
                tf.einsum(
                    "ijk,kl->ijl",
                    tf.nn.tanh(
                        tf.einsum("ijk,kl->ijl", result_attn_rep, self.W_1)),
                    self.W_12))
            beta_logits = tf.squeeze(
                tf.einsum('ijk,kl->ijl', result_attn_rep, self.r_1), 2)
            beta_masked = tf.multiply(
                tf.cast(self.kb_mask, dtype=tf.float64),
                tf.exp(tf.cast(beta_logits, dtype=tf.float64)))
            beta = tf.cast(tf.einsum('i,ij->ij',
                                     tf.pow(tf.reduce_sum(beta_masked, 1), -1),
                                     beta_masked),
                           dtype=tf.float32)

            hidden_state_expanded_keys = tf.tile(
                array_ops.expand_dims(array_ops.expand_dims(hidden_state, 1),
                                      1),
                [1, tf.shape(self.kb)[1],
                 tf.shape(self.kb)[2], 1])
            inp_attn_expanded_keys = tf.tile(
                array_ops.expand_dims(array_ops.expand_dims(inp_attn, 1), 1),
                [1, tf.shape(self.kb)[1],
                 tf.shape(self.kb)[2], 1])
            result_key_rep = tf.concat([
                self.keys_emb, hidden_state_expanded_keys,
                inp_attn_expanded_keys
            ],
                                       axis=3)
            result_key_rep = tf.nn.tanh(
                tf.einsum(
                    'ijkl,lm->ijkm',
                    tf.nn.tanh(
                        tf.einsum('ijkl,lm->ijkm', result_key_rep, self.W_2)),
                    self.W_22))
            gamma_logits = tf.squeeze(
                tf.einsum('ijkl,lm->ijkm', result_key_rep, self.r_2), 3)
            gamma_masked = tf.multiply(
                tf.cast(self.keys_mask, dtype=tf.float64),
                tf.exp(tf.cast(gamma_logits, dtype=tf.float64)))
            gamma = tf.einsum(
                'ij,ijk->ijk', beta,
                tf.cast(tf.einsum('ij,ijk->ijk',
                                  tf.pow(tf.reduce_sum(gamma_masked, 2), -1),
                                  gamma_masked),
                        dtype=tf.float32))

            batch_nums_context = array_ops.expand_dims(
                tf.range(0, limit=self.batch_size, dtype=tf.int64), 1)
            batch_nums_tiled_context = tf.tile(
                batch_nums_context, [1, tf.shape(self.encoder_states)[1]])
            flat_inp_utt = tf.reshape(self.inp_utt,
                                      shape=[self.batch_size, -1])
            indices_context = tf.stack(
                [batch_nums_tiled_context, flat_inp_utt], axis=2)
            shape = [self.batch_size, self.out_vocab_size]
            context_copy_dist = tf.scatter_nd(indices_context, a, shape)

            db_rep = tf.reduce_sum(
                tf.einsum('ij,ijk->ijk', beta, self.result_rep), 1)

            p_db = tf.nn.sigmoid(
                tf.matmul(tf.concat([hidden_state, inp_attn, db_rep], axis=1),
                          self.W4) + self.b3)
            p_db = tf.tile(p_db, [1, self.out_vocab_size])
            one_minus_fn = lambda x: 1 - x
            one_minus_pdb = tf.map_fn(one_minus_fn, p_db)

            p_gens = tf.nn.sigmoid(
                tf.matmul(tf.concat([hidden_state, inp_attn, db_rep], axis=1),
                          self.W3) + self.b2)
            p_gens = tf.tile(p_gens, [1, self.out_vocab_size])
            one_minus_fn = lambda x: 1 - x
            one_minus_pgens = tf.map_fn(one_minus_fn, p_gens)

            batch_nums = array_ops.expand_dims(
                tf.range(0, limit=self.batch_size, dtype=tf.int64), 1)
            kb_ids = tf.reshape(self.kb, shape=[self.batch_size, -1])
            num_kb_ids = tf.shape(kb_ids)[1]
            batch_nums_tiled = tf.tile(batch_nums, [1, num_kb_ids])
            indices = tf.stack([batch_nums_tiled, kb_ids], axis=2)
            updates = tf.reshape(gamma, shape=[self.batch_size, -1])
            shape = [self.batch_size, self.out_vocab_size]
            kb_dist = tf.scatter_nd(indices, updates, shape)
            kb_dist = tf.einsum('i,ij->ij', self.db_empty, kb_dist)

            copy_dist = tf.multiply(p_db, kb_dist) + tf.multiply(
                one_minus_pdb, context_copy_dist)
            final_dist = tf.multiply(p_gens,
                                     extended_generate_dist) + tf.multiply(
                                         one_minus_pgens, copy_dist)

            return final_dist
コード例 #41
0
    def _process_input_helper(self,
                              update_row_factors,
                              sp_input=None,
                              transpose_input=False,
                              row_weights=None):
        """Creates the graph for processing a sparse slice of input.

    Args:
      update_row_factors: if True, update or project the row_factors, else
        update or project the column factors.
      sp_input: Please refer to comments for update_row_factors,
        update_col_factors, project_row_factors, and project_col_factors for
        restrictions.
      transpose_input: If True, the input is logically transposed and then the
        corresponding rows/columns of the transposed input are updated.
      row_weights: If not None, this is the row/column weights to be used for
        the update or projection. If None, use the corresponding weights from
        the model. Note that the feature (column/row) weights will be
        determined by the model. When not None, it can either be a scalar or
        a rank-1 tensor with the same number of elements as the number of rows
        of columns to be updated/projected.

    Returns:
      A tuple consisting of the following elements:
      new_values: New values for the row/column factors.
      update_op: An op that assigns the newly computed values to the row/column
        factors.
      unregularized_loss: A tensor (scalar) that contains the normalized
        minibatch loss corresponding to sp_input, without the regularization
        term. Add the regularization term below to yield the loss.
      regularization: A tensor (scalar) that contains the normalized
        regularization term for the minibatch loss corresponding to sp_input.
      sum_weights: The sum of the weights corresponding to sp_input. This
        can be used with unregularized loss to calculate the root weighted
        squared error.
    """
        assert isinstance(sp_input, sparse_tensor.SparseTensor)

        if update_row_factors:
            left = self._row_factors
            right_factors = self._col_factors_cache
            row_wt = self._row_wt_cache
            col_wt = self._col_wt_cache
            total_rows = self._input_rows
            total_cols = self._input_cols
            sharding_func = WALSModel._get_sharding_func(
                self._input_rows, self._num_row_shards)
            gramian = self._col_gramian_cache
        else:
            left = self._col_factors
            right_factors = self._row_factors_cache
            row_wt = self._col_wt_cache
            col_wt = self._row_wt_cache
            total_rows = self._input_cols
            total_cols = self._input_rows
            sharding_func = WALSModel._get_sharding_func(
                self._input_cols, self._num_col_shards)
            gramian = self._row_gramian_cache
            transpose_input = not transpose_input

        # Note that the row indices of sp_input are based on the original full input
        # Here we reindex the rows and give them contiguous ids starting at 0.
        # We use tf.unique to achieve this reindexing. Note that this is done so
        # that the downstream kernel can assume that the input is "dense" along the
        # row dimension.
        row_ids, col_ids = array_ops.split(value=sp_input.indices,
                                           num_or_size_splits=2,
                                           axis=1)
        update_row_indices, all_row_ids = array_ops.unique(row_ids[:, 0])
        update_col_indices, all_col_ids = array_ops.unique(col_ids[:, 0])
        col_ids = array_ops.expand_dims(
            math_ops.cast(all_col_ids, dtypes.int64), 1)
        row_ids = array_ops.expand_dims(
            math_ops.cast(all_row_ids, dtypes.int64), 1)

        if transpose_input:
            update_indices = update_col_indices
            row_shape = [
                math_ops.cast(
                    array_ops.shape(update_row_indices)[0], dtypes.int64)
            ]
            gather_indices = update_row_indices
        else:
            update_indices = update_row_indices
            row_shape = [
                math_ops.cast(
                    array_ops.shape(update_col_indices)[0], dtypes.int64)
            ]
            gather_indices = update_col_indices

        num_rows = math_ops.cast(
            array_ops.shape(update_indices)[0], dtypes.int64)
        col_shape = [num_rows]
        right = embedding_ops.embedding_lookup(right_factors,
                                               gather_indices,
                                               partition_strategy="div")
        new_sp_indices = array_ops.concat([row_ids, col_ids], 1)
        new_sp_shape = (array_ops.concat([row_shape, col_shape], 0)
                        if transpose_input else array_ops.concat(
                            [col_shape, row_shape], 0))
        new_sp_input = sparse_tensor.SparseTensor(indices=new_sp_indices,
                                                  values=sp_input.values,
                                                  dense_shape=new_sp_shape)

        # Compute lhs and rhs of the normal equations
        total_lhs = (self._unobserved_weight * gramian)
        if self._regularization_matrix is not None:
            total_lhs += self._regularization_matrix
        if self._row_weights is None:
            # Special case of ALS. Use a much simpler update rule.
            total_rhs = (self._unobserved_weight *
                         sparse_ops.sparse_tensor_dense_matmul(
                             new_sp_input, right, adjoint_a=transpose_input))
            # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of
            # transposing explicitly.
            # TODO(rmlarsen): multi-thread tf.matrix_solve.
            new_left_values = array_ops.transpose(
                linalg_ops.matrix_solve(total_lhs,
                                        array_ops.transpose(total_rhs)))
        else:
            if row_weights is None:
                # TODO(yifanchen): Add special handling for single shard without using
                # embedding_lookup and perform benchmarks for those cases. Same for
                # col_weights lookup below.
                row_weights_slice = embedding_ops.embedding_lookup(
                    row_wt, update_indices, partition_strategy="div")
            else:
                num_indices = array_ops.shape(update_indices)[0]
                with ops.control_dependencies([
                        check_ops.assert_less_equal(
                            array_ops.rank(row_weights), 1)
                ]):
                    row_weights_slice = control_flow_ops.cond(
                        math_ops.equal(array_ops.rank(row_weights), 0), lambda:
                        (array_ops.ones([num_indices]) * row_weights),
                        lambda: math_ops.cast(row_weights, dtypes.float32))

            col_weights = embedding_ops.embedding_lookup(
                col_wt, gather_indices, partition_strategy="div")
            partial_lhs, total_rhs = (
                gen_factorization_ops.wals_compute_partial_lhs_and_rhs(
                    right,
                    col_weights,
                    self._unobserved_weight,
                    row_weights_slice,
                    new_sp_input.indices,
                    new_sp_input.values,
                    num_rows,
                    transpose_input,
                    name="wals_compute_partial_lhs_rhs"))
            total_lhs = array_ops.expand_dims(total_lhs, 0) + partial_lhs
            total_rhs = array_ops.expand_dims(total_rhs, -1)
            new_left_values = array_ops.squeeze(
                linalg_ops.matrix_solve(total_lhs, total_rhs), [2])

        update_op_name = "row_update" if update_row_factors else "col_update"
        update_op = self.scatter_update(left,
                                        update_indices,
                                        new_left_values,
                                        sharding_func,
                                        name=update_op_name)

        # Create the loss subgraph
        loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input)
                         if transpose_input else new_sp_input)
        # sp_approx is the low rank estimate of the input matrix, formed by
        # computing the product <\\(u_i, v_j\\)> for (i, j) in loss_sp_input.indices.
        sp_approx_vals = gen_factorization_ops.masked_matmul(
            new_left_values,
            right,
            loss_sp_input.indices,
            transpose_a=False,
            transpose_b=True)
        sp_approx = sparse_tensor.SparseTensor(loss_sp_input.indices,
                                               sp_approx_vals,
                                               loss_sp_input.dense_shape)
        sp_approx_sq = math_ops.square(sp_approx)
        sp_residual = sparse_ops.sparse_add(loss_sp_input, sp_approx * (-1))
        sp_residual_sq = math_ops.square(sp_residual)
        row_wt_mat = (constant_op.constant(0.) if self._row_weights is None
                      else array_ops.expand_dims(row_weights_slice, 1))
        col_wt_mat = (constant_op.constant(0.) if self._col_weights is None
                      else array_ops.expand_dims(col_weights, 0))

        # We return the normalized loss
        partial_row_gramian = math_ops.matmul(new_left_values,
                                              new_left_values,
                                              transpose_a=True)
        normalization_factor = total_rows / math_ops.cast(
            num_rows, dtypes.float32)

        unregularized_loss = (
            self._unobserved_weight * (  # pyformat line break
                sparse_ops.sparse_reduce_sum(sp_residual_sq) -  # pyformat break
                sparse_ops.sparse_reduce_sum(sp_approx_sq) +  # pyformat break
                math_ops.trace(math_ops.matmul(partial_row_gramian, gramian)))
            + sparse_ops.sparse_reduce_sum(
                row_wt_mat *
                (sp_residual_sq * col_wt_mat))) * normalization_factor

        if self._regularization is not None:
            regularization = self._regularization * (
                math_ops.trace(partial_row_gramian) * normalization_factor +
                math_ops.trace(gramian))
        else:
            regularization = constant_op.constant(0.)

        sum_weights = self._unobserved_weight * math_ops.cast(
            total_rows * total_cols, dtypes.float32)
        if self._row_weights is not None and self._col_weights is not None:
            ones = sparse_tensor.SparseTensor(
                indices=loss_sp_input.indices,
                values=array_ops.ones(array_ops.shape(loss_sp_input.values)),
                dense_shape=loss_sp_input.dense_shape)
            sum_weights += sparse_ops.sparse_reduce_sum(
                row_wt_mat * (ones * col_wt_mat)) * normalization_factor

        return (new_left_values, update_op, unregularized_loss, regularization,
                sum_weights)
コード例 #42
0
def expand_dims(input: ragged_tensor.Ragged, axis, name=None):  # pylint: disable=redefined-builtin
    """Inserts a dimension with shape 1 into a potentially ragged tensor's shape.

  Given a potentially ragged tenor `input`, this operation inserts a
  dimension with size 1 at the dimension `axis` of `input`'s shape.

  The following table gives some examples showing how `ragged.expand_dims`
  impacts the shapes of different input tensors.  Ragged dimensions are
  indicated by enclosing them in parentheses.

  input.shape             | axis | result.shape
  ----------------------- | ---- | -----------------------------
  `[D1, D2]`              |  `0` | `[1, D1, D2]`
  `[D1, D2]`              |  `1` | `[D1, 1, D2]`
  `[D1, D2]`              |  `2` | `[D1, D2, 1]`
  `[D1, (D2), (D3), D4]`  |  `0` | `[1, D1, (D2), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `1` | `[D1, 1, (D2), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `2` | `[D1, (D2), 1, (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `3` | `[D1, (D2), (D3), 1, D4]`
  `[D1, (D2), (D3), D4]`  |  `4` | `[D1, (D2), (D3), D4, 1]`

  Args:
    input: The potentially tensor that should be expanded with a new dimension.
    axis: An integer constant indicating where the new dimension should be
      inserted.
    name: A name for the operation (optional).

  Returns:
    A tensor with the same values as `input`, with an added dimension of
    size 1 at `axis`.

  #### Examples:

  >>> rt = tf.ragged.constant([[1, 2], [3]])
  >>> print(rt.shape)
  (2, None)

  >>> expanded = tf.expand_dims(rt, axis=0)
  >>> print(expanded.shape, expanded)
  (1, 2, None) <tf.RaggedTensor [[[1, 2], [3]]]>

  >>> expanded = tf.expand_dims(rt, axis=1)
  >>> print(expanded.shape, expanded)
  (2, 1, None) <tf.RaggedTensor [[[1, 2]], [[3]]]>

  >>> expanded = tf.expand_dims(rt, axis=2)
  >>> print(expanded.shape, expanded)
  (2, None, 1) <tf.RaggedTensor [[[1], [2]], [[3]]]>
  """
    with ops.name_scope(name, 'RaggedExpandDims', [input]):
        input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input,
                                                                 name='input')

        if not ragged_tensor.is_ragged(input):
            return array_ops.expand_dims(input, axis)

        ndims = None if input.shape.ndims is None else input.shape.ndims + 1
        axis = array_ops.get_positive_axis(axis,
                                           ndims,
                                           ndims_name='rank(input)')

        if axis == 0:
            return ragged_tensor.RaggedTensor.from_uniform_row_length(
                input,
                uniform_row_length=input.nrows(),
                nrows=1,
                validate=False)
        elif axis == 1:
            return ragged_tensor.RaggedTensor.from_uniform_row_length(
                input,
                uniform_row_length=1,
                nrows=input.nrows(),
                validate=False)
        else:
            if ragged_tensor.is_ragged(input.values):
                return input.with_values(expand_dims(input.values, axis - 1))
            else:
                return input.with_values(
                    array_ops.expand_dims(input.values, axis - 1))
コード例 #43
0
def combine_segments(segments, start_of_sequence_id, end_of_segment_id):
  """Combine one or more input segments for a model's input sequence.

  `combine_segments` combines the tokens of one or more input segments to a
  single sequence of token values and generates matching segment ids.
  `combine_segments` can follow a `Trimmer`, who limit segment lengths and
  emit `RaggedTensor` outputs, and can be followed up by `ModelInputPacker`.

  See `Detailed Experimental Setup` in `BERT: Pre-training of Deep Bidirectional
  Transformers for Language Understanding`
  (https://arxiv.org/pdf/1810.04805.pdf) for more examples of combined
  segments.


  `combine_segments` first flattens and combines a list of one or more
  segments
  (`RaggedTensor`s of n dimensions) together along the 1st axis, then packages
  any special tokens  into a final n dimensional `RaggedTensor`.

  And finally `combine_segments` generates another `RaggedTensor` (with the
  same rank as the final combined `RaggedTensor`) that contains a distinct int
  id for each segment.

  Example usage:

  ```
  segment_a = [[1, 2],
               [3, 4,],
               [5, 6, 7, 8, 9]]

  segment_b = [[10, 20,],
               [30, 40, 50, 60,],
               [70, 80]]
  expected_combined, expected_ids = combine_segments([segment_a, segment_b])

  # segment_a and segment_b have been combined w/ special tokens describing
  # the beginning of a sequence and end of a sequence inserted.
  expected_combined=[
   [101, 1, 2, 102, 10, 20, 102],
   [101, 3, 4, 102, 30, 40, 50, 60, 102],
   [101, 5, 6, 7, 8, 9, 102, 70, 80, 102],
  ]

  # ids describing which items belong to which segment.
  expected_ids=[
   [0, 0, 0, 0, 1, 1, 1],
   [0, 0, 0, 0, 1, 1, 1, 1, 1],
   [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]]
  ```

  Args:
    segments: A list of `RaggedTensor`s with the tokens of the input segments.
      All elements must have the same dtype (int32 or int64), same rank, and
      same dimension 0 (namely batch size). Slice `segments[i][j, ...]`
      contains the tokens of the i-th input segment to the j-th example in the
      batch.
    start_of_sequence_id: a python int or scalar Tensor containing the id used
      to denote the start of a sequence (e.g. `[CLS]` token in BERT
      terminology).
    end_of_segment_id: a python int or scalar Tensor containing the id used to
      denote end of a segment (e.g. the `[SEP]` token in BERT terminology).

  Returns:
    a tuple of (combined_segments, segment_ids), where:

    combined_segments: A `RaggedTensor` with segments combined and special
      tokens inserted.
    segment_ids:  A `RaggedTensor` w/ the same shape as `combined_segments`
      and containing int ids for each item detailing the segment that they
      correspond to.
  """

  # Create special tokens ([CLS] and [SEP]) that will be combined with the
  # segments
  if len(segments) <= 0:
    raise ValueError("`segments` must be a nonempty list.")
  segment_dtype = segments[0].dtype
  if segment_dtype not in (dtypes.int32, dtypes.int64):
    raise ValueError("`segments` must have elements with dtype of int32 or " +
                     "int64")

  start_of_sequence_id = ops.convert_to_tensor(
      start_of_sequence_id, dtype=segment_dtype)
  end_of_segment_id = ops.convert_to_tensor(
      end_of_segment_id, dtype=segment_dtype)

  start_sequence_id = math_ops.cast(start_of_sequence_id, segment_dtype)
  end_segment_id = math_ops.cast(end_of_segment_id, segment_dtype)
  start_seq_tokens = array_ops.tile([start_sequence_id], [segments[0].nrows()])
  end_segment_tokens = array_ops.tile([end_segment_id], [segments[0].nrows()])
  for i in range(segments[0].ragged_rank):
    start_seq_tokens = array_ops.expand_dims(start_seq_tokens, 1)
    end_segment_tokens = array_ops.expand_dims(end_segment_tokens, 1)
  special_token_segment_template = array_ops.ones_like(start_seq_tokens)

  # Combine all segments w/ special tokens
  segments_to_combine = [start_seq_tokens]
  for seg in segments:
    segments_to_combine.append(seg)
    segments_to_combine.append(end_segment_tokens)
  segments_combined = array_ops.concat(segments_to_combine, 1)

  # Create the segment ids, making sure to account for special tokens.
  segment_ids_to_combine = []
  segment_ids_to_combine.append(special_token_segment_template * 0)
  for i, item in enumerate(segments):
    # Add segment id
    segment_id = array_ops.ones_like(item) * i
    segment_ids_to_combine.append(segment_id)

    # Add for SEP
    special_token_segment_id = special_token_segment_template * i
    segment_ids_to_combine.append(special_token_segment_id)

  segment_ids = array_ops.concat(segment_ids_to_combine, 1)
  return segments_combined, segment_ids
コード例 #44
0
    def __init__(self,
                 num_rows,
                 multiplier,
                 is_non_singular=None,
                 is_self_adjoint=None,
                 is_positive_definite=None,
                 is_square=True,
                 assert_proper_shapes=False,
                 name="LinearOperatorScaledIdentity"):
        r"""Initialize a `LinearOperatorScaledIdentity`.

    The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which
    determines the size of each identity matrix, and a `multiplier`,
    which defines `dtype`, batch shape, and scale of each matrix.

    This operator is able to broadcast the leading (batch) dimensions.

    Args:
      num_rows:  Scalar non-negative integer `Tensor`.  Number of rows in the
        corresponding identity matrix.
      multiplier:  `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar).
      is_non_singular:  Expect that this operator is non-singular.
      is_self_adjoint:  Expect that this operator is equal to its hermitian
        transpose.
      is_positive_definite:  Expect that this operator is positive definite,
        meaning the quadratic form `x^H A x` has positive real part for all
        nonzero `x`.  Note that we do not require the operator to be
        self-adjoint to be positive-definite.  See:
        https://en.wikipedia.org/wiki/Positive-definite_matrix\
            #Extension_for_non_symmetric_matrices
      is_square:  Expect that this operator acts like square [batch] matrices.
      assert_proper_shapes:  Python `bool`.  If `False`, only perform static
        checks that initialization and method arguments have proper shape.
        If `True`, and static checks are inconclusive, add asserts to the graph.
      name: A name for this `LinearOperator`

    Raises:
      ValueError:  If `num_rows` is determined statically to be non-scalar, or
        negative.
    """
        self._assert_proper_shapes = assert_proper_shapes

        if not is_square:
            raise ValueError("A ScaledIdentity operator is always square.")

        with ops.name_scope(name, values=[multiplier, num_rows]):
            self._multiplier = ops.convert_to_tensor(multiplier,
                                                     name="multiplier")

            super(LinearOperatorScaledIdentity,
                  self).__init__(dtype=self._multiplier.dtype,
                                 is_non_singular=is_non_singular,
                                 is_self_adjoint=is_self_adjoint,
                                 is_positive_definite=is_positive_definite,
                                 is_square=is_square,
                                 name=name)

            # Shape [B1,...Bb, 1, 1]
            self._multiplier_matrix = array_ops.expand_dims(
                array_ops.expand_dims(self.multiplier, -1), -1)
            self._multiplier_matrix_conj = math_ops.conj(
                self._multiplier_matrix)
            self._abs_multiplier = math_ops.abs(self.multiplier)

            self._num_rows = linear_operator_util.shape_tensor(num_rows,
                                                               name="num_rows")
            self._num_rows_static = tensor_util.constant_value(self._num_rows)
            self._check_num_rows_possibly_add_asserts()
            self._num_rows_cast_to_dtype = math_ops.cast(
                self._num_rows, self.dtype)
            self._num_rows_cast_to_real_dtype = math_ops.cast(
                self._num_rows, self.dtype.real_dtype)
コード例 #45
0
 def jac_mul(tangent):
     flat_tangent = array_ops.reshape(tangent, shape=[-1])
     tangent_vector = array_ops.expand_dims(flat_tangent, 1)
     jvp_vector = math_ops.matmul(jac_fwd, tangent_vector)
     return array_ops.reshape(jvp_vector, tangent.shape)
コード例 #46
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if variables_to_update is None:
        return
    y_true = math_ops.cast(y_true, dtype=dtypes.float32)
    y_pred = math_ops.cast(y_pred, dtype=dtypes.float32)
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        y_pred, y_true, sample_weight = squeeze_or_expand_dimensions(
            y_pred, y_true, sample_weight)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    thresholds = to_list(thresholds)
    num_thresholds = len(thresholds)
    num_predictions = array_ops.size(y_pred)

    # Reshape predictions and labels.
    predictions_2d = array_ops.reshape(y_pred, [1, -1])
    labels_2d = array_ops.reshape(math_ops.cast(y_true, dtype=dtypes.bool),
                                  [1, -1])

    # Tile the thresholds for every prediction.
    thresh_tiled = array_ops.tile(
        array_ops.expand_dims(array_ops.constant(thresholds), 1),
        array_ops.stack([1, num_predictions]))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1])

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])

    if sample_weight is not None:
        weights = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
        weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]),
                                       [num_thresholds, 1])
    else:
        weights_tiled = None

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=dtypes.float32)
        if weights is not None:
            label_and_pred *= weights
        return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():
        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))
    return control_flow_ops.group(update_ops)
コード例 #47
0
ファイル: losses_utils.py プロジェクト: zyx5256/tensorflow
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None):
    """Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
    If `sample_weight` is None, (y_pred, y_true) is returned.
  """
    y_pred_shape = y_pred.shape
    y_pred_rank = y_pred_shape.ndims
    if y_true is not None:

        # If sparse matrix is provided as `y_true`, the last dimension in `y_pred`
        # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)),
        # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3))
        # In this case, we should not try to remove squeezable dimension.
        y_true_shape = y_true.shape
        y_true_rank = y_true_shape.ndims
        if (y_true_rank is not None) and (y_pred_rank is not None):
            # Use static rank for `y_true` and `y_pred`.
            if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1:
                y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred)
        else:
            # Use dynamic rank.
            rank_diff = array_ops.rank(y_pred) - array_ops.rank(y_true)
            squeeze_dims = lambda: remove_squeezable_dimensions(  # pylint: disable=g-long-lambda
                y_true, y_pred)
            is_last_dim_1 = math_ops.equal(1, array_ops.shape(y_pred)[-1])
            maybe_squeeze_dims = lambda: control_flow_ops.cond(  # pylint: disable=g-long-lambda
                is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred))
            y_true, y_pred = control_flow_ops.cond(
                math_ops.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims)

    if sample_weight is None:
        return y_pred, y_true

    weights_shape = sample_weight.shape
    weights_rank = weights_shape.ndims
    if weights_rank == 0:  # If weights is scalar, do nothing.
        return y_pred, y_true, sample_weight

    if (y_pred_rank is not None) and (weights_rank is not None):
        # Use static rank.
        if weights_rank - y_pred_rank == 1:
            sample_weight = array_ops.squeeze(sample_weight, [-1])
        elif y_pred_rank - weights_rank == 1:
            sample_weight = array_ops.expand_dims(sample_weight, [-1])
        return y_pred, y_true, sample_weight

    # Use dynamic rank.
    weights_rank_tensor = array_ops.rank(sample_weight)
    rank_diff = weights_rank_tensor - array_ops.rank(y_pred)
    maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1])

    def _maybe_expand_weights():
        expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1])
        return control_flow_ops.cond(math_ops.equal(rank_diff, -1),
                                     expand_weights, lambda: sample_weight)

    def _maybe_adjust_weights():
        return control_flow_ops.cond(math_ops.equal(rank_diff,
                                                    1), maybe_squeeze_weights,
                                     _maybe_expand_weights)

    # squeeze or expand last dim of `sample_weight` if its rank differs by 1
    # from the new rank of `y_pred`.
    sample_weight = control_flow_ops.cond(
        math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight,
        _maybe_adjust_weights)
    return y_pred, y_true, sample_weight
コード例 #48
0
ファイル: lstm_ops.py プロジェクト: flavz27/master_PA
  def call(self, inputs, initial_state=None, dtype=None, sequence_length=None):
    """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`.
      initial_state: a tuple `(initial_cell_state, initial_output)` with tensors
        of shape `[batch_size, self._num_units]`. If this is not provided, the
        cell is expected to create a zero initial state of type `dtype`.
      dtype: The data type for the initial state and expected output. Required
        if `initial_state` is not provided or RNN state has a heterogeneous
        dtype.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len).`
        Defaults to `time_len` for each element.

    Returns:
      A pair containing:

      - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]`
        or a list of time_len tensors of shape `[batch_size, output_size]`,
        to match the type of the `inputs`.
      - Final state: a tuple `(cell_state, output)` matching `initial_state`.

    Raises:
      ValueError: in case of shape mismatches
    """
    is_list = isinstance(inputs, list)
    if is_list:
      inputs = array_ops.stack(inputs)
    inputs_shape = inputs.get_shape().with_rank(3)
    if not inputs_shape[2]:
      raise ValueError("Expecting inputs_shape[2] to be set: %s" % inputs_shape)
    batch_size = inputs_shape.dims[1].value
    if batch_size is None:
      batch_size = array_ops.shape(inputs)[1]
    time_len = inputs_shape.dims[0].value
    if time_len is None:
      time_len = array_ops.shape(inputs)[0]

    # Provide default values for initial_state and dtype
    if initial_state is None:
      if dtype is None:
        raise ValueError("Either initial_state or dtype needs to be specified")
      z = array_ops.zeros(
          array_ops.stack([batch_size, self.num_units]), dtype=dtype)
      initial_state = z, z
    else:
      if len(initial_state) != 2:
        raise ValueError(
            "Expecting initial_state to be a tuple with length 2 or None")
      if dtype is None:
        dtype = initial_state[0].dtype

    # create the actual cell
    if sequence_length is not None:
      sequence_length = ops.convert_to_tensor(sequence_length)
    initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
    cell_states, outputs = self._call_cell(
        inputs, initial_cell_state, initial_output, dtype, sequence_length)

    if sequence_length is not None:
      # Mask out the part beyond sequence_length
      mask = array_ops.transpose(
          array_ops.sequence_mask(sequence_length, time_len, dtype=dtype),
          [1, 0])
      mask = array_ops.tile(
          array_ops.expand_dims(mask, [-1]), [1, 1, self.num_units])
      outputs *= mask
      # Prepend initial states to cell_states and outputs for indexing to work
      # correctly,since we want to access the last valid state at
      # sequence_length - 1, which can even be -1, corresponding to the
      # initial state.
      mod_cell_states = array_ops.concat(
          [array_ops.expand_dims(initial_cell_state, [0]), cell_states], 0)
      mod_outputs = array_ops.concat(
          [array_ops.expand_dims(initial_output, [0]), outputs], 0)
      final_cell_state = self._gather_states(mod_cell_states, sequence_length,
                                             batch_size)
      final_output = self._gather_states(mod_outputs, sequence_length,
                                         batch_size)
    else:
      # No sequence_lengths used: final state is the last state
      final_cell_state = cell_states[-1]
      final_output = outputs[-1]

    if is_list:
      # Input was a list, so return a list
      outputs = array_ops.unstack(outputs)

    final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state, final_output)
    return outputs, final_state
コード例 #49
0
ファイル: metrics.py プロジェクト: tomchen1000/tensorflow
def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
  """Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `confusion_matrix.remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
  """
  if y_true is not None:
    # squeeze last dim of `y_pred` or `y_true` if their rank differs by 1
    y_true, y_pred = confusion_matrix.remove_squeezable_dimensions(
        y_true, y_pred)

  if sample_weight is None:
    return y_pred, y_true, None

  sample_weight = ops.convert_to_tensor(sample_weight)
  weights_shape = sample_weight.get_shape()
  weights_rank = weights_shape.ndims
  if weights_rank == 0:  # If weights is scalar, do nothing.
    return y_pred, y_true, sample_weight

  y_pred_shape = y_pred.get_shape()
  y_pred_rank = y_pred_shape.ndims
  if (y_pred_rank is not None) and (weights_rank is not None):
    # Use static rank.
    if weights_rank - y_pred_rank == 1:
      sample_weight = array_ops.squeeze(sample_weight, [-1])
    elif y_pred_rank - weights_rank == 1:
      sample_weight = array_ops.expand_dims(sample_weight, [-1])
    return y_pred, y_true, sample_weight

  # Use dynamic rank.
  weights_rank_tensor = array_ops.rank(sample_weight)
  rank_diff = weights_rank_tensor - array_ops.rank(y_pred)
  maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1])

  def _maybe_expand_weights():
    return control_flow_ops.cond(
        math_ops.equal(rank_diff,
                       -1), lambda: array_ops.expand_dims(sample_weight, [-1]),
        lambda: sample_weight)

  def _maybe_adjust_weights():
    return control_flow_ops.cond(
        math_ops.equal(rank_diff, 1), maybe_squeeze_weights,
        _maybe_expand_weights)

  # squeeze or expand last dim of `sample_weight` if its rank differs by 1
  # from the new rank of `y_pred`.
  sample_weight = control_flow_ops.cond(
      math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight,
      _maybe_adjust_weights)
  return y_pred, y_true, sample_weight
コード例 #50
0
ファイル: losses_utils.py プロジェクト: zyx5256/tensorflow
 def _maybe_expand_weights():
     expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1])
     return control_flow_ops.cond(math_ops.equal(rank_diff, -1),
                                  expand_weights, lambda: sample_weight)
コード例 #51
0
def _beam_search_step(time, logits, beam_state, batch_size, beam_width,
                      end_token, length_penalty_weight):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape `[B, vocab_size]`
    beam_state: Current state of the beam search. An instance of `BeamState`
    batch_size: The batch size for this input.
    beam_width: The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    probs = nn_ops.log_softmax(logits)
    probs = _mask_probs(probs, end_token, previously_finished)
    total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + probs

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.get_shape().as_list()[-1]
    lengths_to_add = array_ops.one_hot(
        array_ops.tile(array_ops.reshape(end_token, [1, 1]),
                       [batch_size, beam_width]), vocab_size, 0, 1)
    add_mask = (1 - math_ops.to_int32(previously_finished))
    lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add
    new_prediction_lengths = array_ops.expand_dims(prediction_lengths,
                                                   2) + lengths_to_add

    # Calculate the scores for each beam
    scores = _get_scores(log_probs=total_probs,
                         sequence_lengths=new_prediction_lengths,
                         length_penalty_weight=length_penalty_weight)

    scores_flat = array_ops.reshape(scores, [batch_size, -1])
    # During the first time step we only consider the initial beam
    scores_flat = control_flow_ops.cond(
        ops.convert_to_tensor(time) > 0, lambda: scores_flat,
        lambda: scores[:, 0])

    # Pick the next beams according to the specified successors function
    next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=beam_width)
    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen predictions
    next_beam_probs = _tensor_gather_helper(
        gather_indices=word_indices,
        gather_from=total_probs,
        range_input=batch_size,
        range_size=beam_width * vocab_size,
        final_shape=[static_batch_size, beam_width])

    next_word_ids = math_ops.to_int32(word_indices % vocab_size)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size)

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        range_input=batch_size,
        range_size=beam_width,
        final_shape=[static_batch_size, beam_width])
    next_finished = math_ops.logical_or(
        previously_finished, math_ops.equal(next_word_ids, end_token))

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged
    # 2. Beams that are now finished (EOS predicted) remain unchanged
    # 3. Beams that are not yet finished have their length increased by 1
    lengths_to_add = math_ops.to_int32(
        math_ops.not_equal(next_word_ids, end_token))
    lengths_to_add = (1 - math_ops.to_int32(next_finished)) * lengths_to_add
    next_prediction_len = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=beam_state.lengths,
        range_input=batch_size,
        range_size=beam_width,
        final_shape=[static_batch_size, beam_width])
    next_prediction_len += lengths_to_add

    next_state = BeamSearchDecoderState(cell_state=beam_state.cell_state,
                                        log_probs=next_beam_probs,
                                        lengths=next_prediction_len,
                                        finished=next_finished)

    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
コード例 #52
0
    def posterior_from_prior_state(self, prior_state, prior_state_var,
                                   observation, observation_model,
                                   predicted_observations, observation_noise):
        """Compute a posterior over states given an observation.

    Args:
      prior_state: Prior state mean [batch size x state dimension]
      prior_state_var: Prior state covariance [batch size x state dimension x
          state dimension]
      observation: The observed value corresponding to the predictions given
          [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      predicted_observations: An (observation mean, observation variance) tuple
          computed based on the current state, usually the output of
          observed_from_state.
      observation_noise: A [batch size x observation dimension x observation
          dimension] or [observation dimension x observation dimension] Tensor
          with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      Posterior mean and covariance (dimensions matching the first two
      arguments).

    """
        observed_mean, observed_var = predicted_observations
        residual = observation - observed_mean
        # TODO(allenl): Can more of this be done using matrix_solve_ls?
        kalman_solve_rhs = math_ops.matmul(observation_model,
                                           prior_state_var,
                                           adjoint_b=True)
        # This matrix_solve adjoint doesn't make a difference symbolically (since
        # observed_var is a covariance matrix, and should be symmetric), but
        # filtering on multivariate series is unstable without it. See
        # test_multivariate_symmetric_covariance_float64 in kalman_filter_test.py
        # for an example of the instability (fails with adjoint=False).
        kalman_gain_transposed = linalg_ops.matrix_solve(matrix=observed_var,
                                                         rhs=kalman_solve_rhs,
                                                         adjoint=True)
        posterior_state = prior_state + array_ops.squeeze(math_ops.matmul(
            kalman_gain_transposed,
            array_ops.expand_dims(residual, -1),
            adjoint_a=True),
                                                          axis=[-1])
        gain_obs = math_ops.matmul(kalman_gain_transposed,
                                   observation_model,
                                   adjoint_a=True)
        identity_extradim = linalg_ops.eye(array_ops.shape(gain_obs)[1],
                                           dtype=gain_obs.dtype)[None]
        identity_minus_factor = identity_extradim - gain_obs
        if self._simplified_posterior_covariance_computation:
            # posterior covariance =
            #   (I - kalman_gain * observation_model) * prior_state_var
            posterior_state_var = math_ops.matmul(identity_minus_factor,
                                                  prior_state_var)
        else:
            observation_noise = ops.convert_to_tensor(observation_noise)
            # A Joseph form update, which provides better numeric stability than the
            # simplified optimal Kalman gain update, at the cost of a few extra
            # operations. Joseph form updates are valid for any gain (not just the
            # optimal Kalman gain), and so are more forgiving of numerical errors in
            # computing the optimal Kalman gain.
            #
            # posterior covariance =
            #   (I - kalman_gain * observation_model) * prior_state_var
            #     * (I - kalman_gain * observation_model)^T
            #   + kalman_gain * observation_noise * kalman_gain^T
            left_multiplied_state_var = math_ops.matmul(
                identity_minus_factor, prior_state_var)
            multiplied_state_var = math_ops.matmul(identity_minus_factor,
                                                   left_multiplied_state_var,
                                                   adjoint_b=True)

            def _batch_observation_noise_update():
                return (multiplied_state_var + math_ops.matmul(
                    math_ops.matmul(kalman_gain_transposed,
                                    observation_noise,
                                    adjoint_a=True), kalman_gain_transposed))

            def _matrix_observation_noise_update():
                return (multiplied_state_var + math_ops.matmul(
                    math_utils.batch_times_matrix(
                        kalman_gain_transposed, observation_noise, adj_x=True),
                    kalman_gain_transposed))

            if observation_noise.get_shape().ndims is None:
                posterior_state_var = control_flow_ops.cond(
                    math_ops.equal(array_ops.rank(observation_noise),
                                   2), _matrix_observation_noise_update,
                    _batch_observation_noise_update)
            else:
                # If static shape information exists, it gets checked in each cond()
                # branch, so we need a special case to avoid graph-build-time
                # exceptions.
                if observation_noise.get_shape().ndims == 2:
                    posterior_state_var = _matrix_observation_noise_update()
                else:
                    posterior_state_var = _batch_observation_noise_update()
        return posterior_state, posterior_state_var
コード例 #53
0
ファイル: dirichlet.py プロジェクト: ytsheng/tensorflow
 def _mean(self):
     return self.alpha / array_ops.expand_dims(self.alpha_sum, -1)
コード例 #54
0
def embedding_lookup_sparse(params,
                            sp_ids,
                            sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
    """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If provided, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      shape(combined params) = [p0, p1, ..., pm]

    and

      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]

    then

      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0

    with `combiner`="mean", then the output will be a 3x20 matrix where

      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = (params[0, :] * 1.0) / 1.0
      output[2, :] = (params[1, :] * 3.0) / 3.0

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]
    if not isinstance(sp_ids, sparse_tensor.SparseTensor):
        raise TypeError("sp_ids must be SparseTensor")
    ignore_weights = sp_weights is None
    if not ignore_weights:
        if not isinstance(sp_weights, sparse_tensor.SparseTensor):
            raise TypeError("sp_weights must be either None or SparseTensor")
        sp_ids.values.get_shape().assert_is_compatible_with(
            sp_weights.values.get_shape())
        sp_ids.indices.get_shape().assert_is_compatible_with(
            sp_weights.indices.get_shape())
        sp_ids.dense_shape.get_shape().assert_is_compatible_with(
            sp_weights.dense_shape.get_shape())
        # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
        # sp_weights have equal indices and shapes.

    with ops.name_scope(name, "embedding_lookup_sparse",
                        params + [sp_ids]) as name:
        segment_ids = sp_ids.indices[:, 0]
        if segment_ids.dtype != dtypes.int32:
            segment_ids = math_ops.cast(segment_ids, dtypes.int32)

        ids = sp_ids.values
        if ignore_weights:
            ids, idx = array_ops.unique(ids)
        else:
            idx = None

        embeddings = embedding_lookup(params,
                                      ids,
                                      partition_strategy=partition_strategy,
                                      max_norm=max_norm)
        if not ignore_weights:
            weights = sp_weights.values
            if weights.dtype != embeddings.dtype:
                weights = math_ops.cast(weights, embeddings.dtype)

            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat(
                [array_ops.shape(weights), ones], 0)

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to bcast_weights_shape,
            # the shape becomes None.
            if embeddings.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [1 for _ in range(embeddings.get_shape().ndims - 1)]))

            embeddings *= weights

            if combiner == "sum":
                embeddings = math_ops.segment_sum(embeddings,
                                                  segment_ids,
                                                  name=name)
            elif combiner == "mean":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                embeddings = math_ops.div(embeddings, weight_sum, name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                embeddings = math_ops.div(embeddings,
                                          weight_sum_sqrt,
                                          name=name)
            else:
                assert False, "Unrecognized combiner"
        else:
            assert idx is not None
            if combiner == "sum":
                embeddings = math_ops.sparse_segment_sum(embeddings,
                                                         idx,
                                                         segment_ids,
                                                         name=name)
            elif combiner == "mean":
                embeddings = math_ops.sparse_segment_mean(embeddings,
                                                          idx,
                                                          segment_ids,
                                                          name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.sparse_segment_sqrt_n(embeddings,
                                                            idx,
                                                            segment_ids,
                                                            name=name)
            else:
                assert False, "Unrecognized combiner"

        return embeddings
コード例 #55
0
def resize_images(images,
                  new_height,
                  new_width,
                  method=ResizeMethod.BILINEAR,
                  align_corners=False):
  """Resize `images` to `new_width`, `new_height` using the specified `method`.

  Resized images will be distorted if their original aspect ratio is not
  the same as `new_width`, `new_height`.  To avoid distortions see
  [`resize_image_with_crop_or_pad`](#resize_image_with_crop_or_pad).

  `method` can be one of:

  *   <b>`ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]
      (https://en.wikipedia.org/wiki/Bilinear_interpolation)
  *   <b>`ResizeMethod.NEAREST_NEIGHBOR`</b>: [Nearest neighbor interpolation.]
      (https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
  *   <b>`ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]
      (https://en.wikipedia.org/wiki/Bicubic_interpolation)
  *   <b>`ResizeMethod.AREA`</b>: Area interpolation.

  Args:
    images: 4-D Tensor of shape `[batch, height, width, channels]` or
            3-D Tensor of shape `[height, width, channels]`.
    new_height: integer.
    new_width: integer.
    method: ResizeMethod.  Defaults to `ResizeMethod.BILINEAR`.
    align_corners: bool. If true, exactly align all 4 cornets of the input and
                   output. Defaults to `false`.

  Raises:
    ValueError: if the shape of `images` is incompatible with the
      shape arguments to this function
    ValueError: if an unsupported resize method is specified.

  Returns:
    If `images` was 4-D, a 4-D float Tensor of shape
    `[batch, new_height, new_width, channels]`.
    If `images` was 3-D, a 3-D float Tensor of shape
    `[new_height, new_width, channels]`.
  """
  if images.get_shape().ndims is None:
    raise ValueError('\'images\' contains no shape.')
  # TODO(shlens): Migrate this functionality to the underlying Op's.
  is_batch = True
  if len(images.get_shape()) == 3:
    is_batch = False
    images = array_ops.expand_dims(images, 0)

  _, height, width, depth = _ImageDimensions(images)

  # Handle tensor-valued sizes as well as Python integers.
  try:
    new_width = ops.convert_to_tensor(new_width, dtypes.int32,
                                      name='new_width')
    new_width.get_shape().assert_has_rank(0)
  except (TypeError, ValueError):
    raise ValueError('new_width must be a scalar integer')
  try:
    new_height = ops.convert_to_tensor(new_height, dtypes.int32,
                                       name='new_height')
    new_height.get_shape().assert_has_rank(0)
  except (TypeError, ValueError):
    raise ValueError('new_height must be a scalar integer')

  new_width_const = tensor_util.constant_value(new_width)
  new_height_const = tensor_util.constant_value(new_height)

  if width == new_width_const and height == new_height_const:
    if not is_batch:
      images = array_ops.squeeze(images, squeeze_dims=[0])
    return images

  new_size = array_ops.pack([new_height, new_width])

  if method == ResizeMethod.BILINEAR:
    images = gen_image_ops.resize_bilinear(images,
                                           new_size,
                                           align_corners=align_corners)
  elif method == ResizeMethod.NEAREST_NEIGHBOR:
    images = gen_image_ops.resize_nearest_neighbor(images,
                                                   new_size,
                                                   align_corners=align_corners)
  elif method == ResizeMethod.BICUBIC:
    images = gen_image_ops.resize_bicubic(images,
                                          new_size,
                                          align_corners=align_corners)
  elif method == ResizeMethod.AREA:
    images = gen_image_ops.resize_area(images,
                                       new_size,
                                       align_corners=align_corners)
  else:
    raise ValueError('Resize method is not implemented.')

  # NOTE(mrry): The shape functions for the resize ops cannot unpack
  # the packed values in `new_size`, so set the shape here.
  images.set_shape([None, new_height_const, new_width_const, None])

  if not is_batch:
    images = array_ops.squeeze(images, squeeze_dims=[0])
  return images
コード例 #56
0
ファイル: mel_ops.py プロジェクト: neuroph12/CNNDDDD
def linear_to_mel_weight_matrix(num_mel_bins=20,
                                num_spectrogram_bins=129,
                                sample_rate=8000,
                                lower_edge_hertz=125.0,
                                upper_edge_hertz=3800.0,
                                dtype=dtypes.float32,
                                name=None):
    """Returns a matrix to warp linear scale spectrograms to the [mel scale][mel].

  Returns a weight matrix that can be used to re-weight a `Tensor` containing
  `num_spectrogram_bins` linearly sampled frequency information from
  `[0, sample_rate / 2]` into `num_mel_bins` frequency information from
  `[lower_edge_hertz, upper_edge_hertz]` on the [mel scale][mel].

  For example, the returned matrix `A` can be used to right-multiply a
  spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear
  scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram"
  `M` of shape `[frames, num_mel_bins]`.

      # `S` has shape [frames, num_spectrogram_bins]
      # `M` has shape [frames, num_mel_bins]
      M = tf.matmul(S, A)

  The matrix can be used with `tf.tensordot` to convert an arbitrary rank
  `Tensor` of linear-scale spectral bins into the mel scale.

      # S has shape [..., num_spectrogram_bins].
      # M has shape [..., num_mel_bins].
      M = tf.tensordot(S, A, 1)
      # tf.tensordot does not support shape inference for this case yet.
      M.set_shape(S.shape[:-1].concatenate(A.shape[-1:]))

  Args:
    num_mel_bins: Python int. How many bands in the resulting mel spectrum.
    num_spectrogram_bins: An integer `Tensor`. How many bins there are in the
      source spectrogram data, which is understood to be `fft_size // 2 + 1`,
      i.e. the spectrogram only contains the nonredundant FFT bins.
    sample_rate: Python float. Samples per second of the input signal used to
      create the spectrogram. We need this to figure out the actual frequencies
      for each spectrogram bin, which dictates how they are mapped into the mel
      scale.
    lower_edge_hertz: Python float. Lower bound on the frequencies to be
      included in the mel spectrum. This corresponds to the lower edge of the
      lowest triangular band.
    upper_edge_hertz: Python float. The desired top edge of the highest
      frequency band.
    dtype: The `DType` of the result matrix. Must be a floating point type.
    name: An optional name for the operation.

  Returns:
    A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`.

  Raises:
    ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not
      positive, lower_edge_hertz is negative, frequency edges are incorrectly
      ordered, or upper_edge_hertz is larger than the Nyquist frequency.

  [mel]: https://en.wikipedia.org/wiki/Mel_scale
  """
    with ops.name_scope(name, 'linear_to_mel_weight_matrix') as name:
        # Note: As num_spectrogram_bins is passed to `math_ops.linspace`
        # and the validation is already done in linspace (both in shape function
        # and in kernel), there is no need to validate num_spectrogram_bins here.
        _validate_arguments(num_mel_bins, sample_rate, lower_edge_hertz,
                            upper_edge_hertz, dtype)

        # This function can be constant folded by graph optimization since there are
        # no Tensor inputs.
        sample_rate = ops.convert_to_tensor(sample_rate,
                                            dtype,
                                            name='sample_rate')
        lower_edge_hertz = ops.convert_to_tensor(lower_edge_hertz,
                                                 dtype,
                                                 name='lower_edge_hertz')
        upper_edge_hertz = ops.convert_to_tensor(upper_edge_hertz,
                                                 dtype,
                                                 name='upper_edge_hertz')
        zero = ops.convert_to_tensor(0.0, dtype)

        # HTK excludes the spectrogram DC bin.
        bands_to_zero = 1
        nyquist_hertz = sample_rate / 2.0
        linear_frequencies = math_ops.linspace(
            zero, nyquist_hertz, num_spectrogram_bins)[bands_to_zero:]
        spectrogram_bins_mel = array_ops.expand_dims(
            _hertz_to_mel(linear_frequencies), 1)

        # Compute num_mel_bins triples of (lower_edge, center, upper_edge). The
        # center of each band is the lower and upper edge of the adjacent bands.
        # Accordingly, we divide [lower_edge_hertz, upper_edge_hertz] into
        # num_mel_bins + 2 pieces.
        band_edges_mel = shape_ops.frame(math_ops.linspace(
            _hertz_to_mel(lower_edge_hertz), _hertz_to_mel(upper_edge_hertz),
            num_mel_bins + 2),
                                         frame_length=3,
                                         frame_step=1)

        # Split the triples up and reshape them into [1, num_mel_bins] tensors.
        lower_edge_mel, center_mel, upper_edge_mel = tuple(
            array_ops.reshape(t, [1, num_mel_bins])
            for t in array_ops.split(band_edges_mel, 3, axis=1))

        # Calculate lower and upper slopes for every spectrogram bin.
        # Line segments are linear in the mel domain, not Hertz.
        lower_slopes = (spectrogram_bins_mel -
                        lower_edge_mel) / (center_mel - lower_edge_mel)
        upper_slopes = (upper_edge_mel -
                        spectrogram_bins_mel) / (upper_edge_mel - center_mel)

        # Intersect the line segments with each other and zero.
        mel_weights_matrix = math_ops.maximum(
            zero, math_ops.minimum(lower_slopes, upper_slopes))

        # Re-add the zeroed lower bins we sliced out above.
        return array_ops.pad(mel_weights_matrix, [[bands_to_zero, 0], [0, 0]],
                             name=name)
コード例 #57
0
ファイル: gradients_test.py プロジェクト: prannayk/tensorflow
 def loop_fn(i):
     return model_fn(array_ops.expand_dims(array_ops.gather(inp, i), 0))
コード例 #58
0
ファイル: linalg_grad.py プロジェクト: linyia01/tensorflow-1
def _SvdGrad(op, grad_s, grad_u, grad_v):
    """Gradient for the singular value decomposition."""

    # The derivation for the compute_uv=False case, and most of
    # the derivation for the full_matrices=True case, are in
    # Giles' paper (see reference at top of file).  A derivation for
    # the full_matrices=False case is available at
    # https://j-towns.github.io/papers/svd-derivative.pdf
    a = op.inputs[0]
    a_shape = a.get_shape().with_rank_at_least(2)
    grad_s_mat = array_ops.matrix_diag(grad_s)

    if not op.get_attr("compute_uv"):
        s, u, v = linalg_ops.svd(a, compute_uv=True)
        grad_a = math_ops.matmul(
            u, math_ops.matmul(grad_s_mat, v, adjoint_b=True))
        grad_a.set_shape(a_shape)
        return grad_a

    full_matrices = op.get_attr("full_matrices")

    # TODO(rmlarsen): Make this work with complex types.
    if a.dtype.is_complex:
        raise NotImplementedError(
            "SVD gradient is not implemented for complex types and "
            "compute_uv=True.")
    grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
    grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
    m = a_shape.dims[-2].merge_with(grad_u_shape[-2])
    n = a_shape.dims[-1].merge_with(grad_v_shape[-2])
    batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
        grad_v_shape[:-2])
    a_shape = batch_shape.concatenate([m, n])

    m = a_shape.dims[-2].value
    n = a_shape.dims[-1].value
    # TODO(rmlarsen): Make this work with placeholders.
    if m is None or n is None:
        raise NotImplementedError(
            "SVD gradient has not been implemented for input with unknown "
            "inner matrix shape.")

    s = op.outputs[0]
    u = op.outputs[1]
    v = op.outputs[2]

    use_adjoint = False
    if m > n:
        # Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the
        # Hermitian transpose of the gradient at the end.
        use_adjoint = True
        m, n = n, m
        u, v = v, u
        grad_u, grad_v = grad_v, grad_u

    with ops.control_dependencies([grad_s, grad_u, grad_v]):
        if full_matrices and abs(m - n) > 1:
            raise NotImplementedError(
                "svd gradient is not implemented for abs(m - n) > 1 "
                "when full_matrices is True")
        s_mat = array_ops.matrix_diag(s)
        s2 = math_ops.square(s)

        # NOTICE: Because of the term involving f, the gradient becomes
        # infinite (or NaN in practice) when singular values are not unique.
        # Mathematically this should not be surprising, since for (k-fold)
        # degenerate singular values, the corresponding singular vectors are
        # only defined up a (k-dimensional) subspace. In practice, this can
        # lead to numerical instability when singular values are close but not
        # exactly equal.
        # Also, even with distinct singular values, the diagonal of f can have Inf
        # values before setting to zero, which hurt when differentiating through
        # this op. To avoid that, we add eye to the matrix before taking
        # the reciprocal.
        s_shape = array_ops.shape(s)
        eye = _linalg.eye(s_shape[-1], batch_shape=s_shape[:-1], dtype=s.dtype)
        f = array_ops.matrix_set_diag(
            math_ops.reciprocal(
                array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1) +
                eye), array_ops.zeros_like(s))
        s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s))

        v1 = v[..., :, :m]
        grad_v1 = grad_v[..., :, :m]

        u_gu = math_ops.matmul(u, grad_u, adjoint_a=True)
        v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True)

        f_u = f * u_gu
        f_v = f * v_gv

        term1_nouv = (grad_s_mat +
                      math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) +
                      math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v)))

        term1 = math_ops.matmul(
            u, math_ops.matmul(term1_nouv, v1, adjoint_b=True))

        if m == n:
            grad_a_before_transpose = term1
        else:
            gv1t = array_ops.matrix_transpose(grad_v1)
            gv1t_v1 = math_ops.matmul(gv1t, v1)
            term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True)

            if full_matrices:
                v2 = v[..., :, m:n]
                grad_v2 = grad_v[..., :, m:n]

                v1t_gv2 = math_ops.matmul(v1, grad_v2, adjoint_a=True)
                term2_nous -= math_ops.matmul(v1t_gv2, v2, adjoint_b=True)

            u_s_inv = math_ops.matmul(u, s_inv_mat)
            term2 = math_ops.matmul(u_s_inv, term2_nous)

            grad_a_before_transpose = term1 + term2

        if use_adjoint:
            grad_a = array_ops.matrix_transpose(grad_a_before_transpose)
        else:
            grad_a = grad_a_before_transpose

        grad_a.set_shape(a_shape)
        return grad_a
コード例 #59
0
 def _sparse(i):
     return sparse_tensor.SparseTensorValue(
         indices=array_ops.expand_dims(
             math_ops.range(i, dtype=dtypes.int64), 1),
         values=array_ops.fill([math_ops.to_int32(i)], i),
         dense_shape=[i])
コード例 #60
0
def _parse_single_example_raw(serialized,
                              names=None,
                              sparse_keys=None,
                              sparse_types=None,
                              dense_keys=None,
                              dense_types=None,
                              dense_defaults=None,
                              dense_shapes=None,
                              name=None):
    """Parses a single `Example` proto.

  Args:
    serialized: A scalar string Tensor, a single serialized Example.
      See `_parse_example_raw` documentation for more details.
    names: (Optional) A scalar string Tensor, the associated name.
      See `_parse_example_raw` documentation for more details.
    sparse_keys: See `_parse_example_raw` documentation for more details.
    sparse_types: See `_parse_example_raw` documentation for more details.
    dense_keys: See `_parse_example_raw` documentation for more details.
    dense_types: See `_parse_example_raw` documentation for more details.
    dense_defaults: See `_parse_example_raw` documentation for more details.
    dense_shapes: See `_parse_example_raw` documentation for more details.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.

  Raises:
    ValueError: if any feature is invalid.
  """
    with ops.name_scope(name, "ParseSingleExample", [serialized, names]):
        serialized = ops.convert_to_tensor(serialized)
        serialized_shape = serialized.get_shape()
        if serialized_shape.ndims is not None:
            if serialized_shape.ndims != 0:
                raise ValueError("Input serialized must be a scalar")
        else:
            serialized = control_flow_ops.with_dependencies(
                [
                    control_flow_ops.Assert(math_ops.equal(
                        array_ops.rank(serialized),
                        0), ["Input serialized must be a scalar"],
                                            name="SerializedIsScalar")
                ],
                serialized,
                name="SerializedDependencies")
        serialized = array_ops.expand_dims(serialized, 0)
        if names is not None:
            names = ops.convert_to_tensor(names)
            names_shape = names.get_shape()
            if names_shape.ndims is not None:
                if names_shape.ndims != 0:
                    raise ValueError("Input names must be a scalar")
            else:
                names = control_flow_ops.with_dependencies(
                    [
                        control_flow_ops.Assert(math_ops.equal(
                            array_ops.rank(names),
                            0), ["Input names must be a scalar"],
                                                name="NamesIsScalar")
                    ],
                    names,
                    name="NamesDependencies")
            names = array_ops.expand_dims(names, 0)

        outputs = _parse_example_raw(serialized,
                                     names=names,
                                     sparse_keys=sparse_keys,
                                     sparse_types=sparse_types,
                                     dense_keys=dense_keys,
                                     dense_types=dense_types,
                                     dense_defaults=dense_defaults,
                                     dense_shapes=dense_shapes,
                                     name=name)
        if dense_keys is not None:
            for d in dense_keys:
                d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d)
                outputs[d] = array_ops.squeeze(outputs[d], [0],
                                               name="Squeeze_%s" % d_name)
        if sparse_keys is not None:
            for s in sparse_keys:
                s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s)
                outputs[s] = sparse_tensor.SparseTensor(
                    array_ops.slice(outputs[s].indices, [0, 1], [-1, -1],
                                    name="Slice_Indices_%s" % s_name),
                    outputs[s].values,
                    array_ops.slice(outputs[s].shape, [1], [-1],
                                    name="Squeeze_Shape_%s" % s_name))
        return outputs