def _GetVal(use_xla):
      with self.cached_session():
        t0 = array_ops.placeholder(np.float32, shape=input_sizes)
        t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
        t2 = array_ops.placeholder(np.float32, shape=output_sizes)
        native_t0 = t0
        native_t2 = t2
        strides = [1, stride, stride, 1]

        if use_xla:
          if data_format == "NCHW":
            # Transpose from NWHC input to NCHW
            # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
            native_t0 = array_ops.transpose(t0, [0, 3, 1, 2])
            native_t2 = array_ops.transpose(t2, [0, 3, 1, 2])
            strides = [1, 1, stride, stride]
          with self.test_scope():
            backprop = nn_ops.depthwise_conv2d_native_backprop_filter(
                native_t0,
                t1,
                native_t2,
                strides=strides,
                padding=padding,
                data_format=data_format)
        else:
          # For CPU, the format NCHW is not supported. Therefore we always use
          # NHWC here.
          backprop = nn_ops.depthwise_conv2d_native_backprop_filter(
              native_t0, t1, native_t2, strides=strides, padding=padding)
        ret = backprop.eval({t0: x0, t2: x2})
        self.assertShapeEqual(ret, backprop)
        return ret
Exemple #2
0
def reduce_to_final(images, num_filters_out, nhidden=None, scope=None):
  """Reduce an image to a final state by running two LSTMs.

  Args:
    images: (num_images, height, width, depth) tensor
    num_filters_out: output layer depth
    nhidden: hidden layer depth (defaults to num_filters_out)
    scope: optional scope name

  Returns:
    A (num_images, num_filters_out) batch.
  """
  with variable_scope.variable_scope(scope, "ReduceToFinal", [images]):
    nhidden = nhidden or num_filters_out
    batch_size, height, width, depth = _shape(images)
    transposed = array_ops.transpose(images, [1, 0, 2, 3])
    reshaped = array_ops.reshape(transposed,
                                 [height, batch_size * width, depth])
    with variable_scope.variable_scope("reduce1"):
      reduced = lstm1d.sequence_to_final(reshaped, nhidden)
      transposed_hidden = array_ops.reshape(reduced,
                                            [batch_size, width, nhidden])
      hidden = array_ops.transpose(transposed_hidden, [1, 0, 2])
    with variable_scope.variable_scope("reduce2"):
      output = lstm1d.sequence_to_final(hidden, num_filters_out)
    return output
Exemple #3
0
def rot90(image, k=1):
  """Rotate an image counter-clockwise by 90 degrees.

  Args:
    image: A 3-D tensor of shape `[height, width, channels].`
    k: Number of times the image is rotated by 90 degrees.

  Returns:
    A rotated 3-D tensor of the same type and shape as `image`.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  k %= 4
  if k == 0:
    return image
  elif k == 1:
    return array_ops.transpose(
        array_ops.reverse(image, [False, True, False]),
        [1, 0, 2], name='rot90')
  elif k == 2:
    return array_ops.reverse(image, [True, True, False], name='rot90')
  elif k == 3:
    return array_ops.reverse(
        array_ops.transpose(image, [1, 0, 2], name='rot90'),
        [False, True, False])
  def call(self, inputs):
    if self.data_format == 'channels_first':
      # Reshape to channels last
      inputs = array_ops.transpose(inputs, (0, 2, 3, 1))

    # Apply the actual ops.
    outputs = nn.separable_conv2d(
        inputs,
        self.depthwise_kernel,
        self.pointwise_kernel,
        strides=(1,) + self.strides + (1,),
        padding=self.padding.upper(),
        rate=self.dilation_rate)

    if self.data_format == 'channels_first':
      # Reshape to channels first
      outputs = array_ops.transpose(outputs, (0, 3, 1, 2))

    if self.bias:
      outputs = nn.bias_add(
          outputs,
          self.bias,
          data_format=utils.convert_data_format(self.data_format, ndim=4))

    if self.activation is not None:
      return self.activation(outputs)
    return outputs
Exemple #5
0
def _state_to_olabel_unique(labels, num_labels, states, unique):
  """Sum state log probs to ilabel log probs using unique label indices."""

  num_label_states = _get_dim(labels, 1) + 1
  label_states = states[:, :, 1:num_label_states]
  blank_states = states[:, :, num_label_states:]

  unique_y, unique_idx = unique
  mul_reduce = _sum_states(unique_idx, label_states)

  num_frames = states.shape[0]
  batch_size = states.shape[1]
  num_states = num_label_states - 1
  batch_state_major = array_ops.transpose(mul_reduce, perm=[1, 2, 0])
  batch_state_major = array_ops.reshape(
      batch_state_major, [batch_size * num_states, num_frames])
  batch_offset = math_ops.range(batch_size, dtype=unique_y.dtype) * num_labels
  indices = unique_y + array_ops.expand_dims(batch_offset, axis=-1)
  indices = array_ops.reshape(indices, [-1, 1])
  scatter = array_ops.scatter_nd(
      indices=indices,
      updates=batch_state_major,
      shape=[batch_size * num_labels, num_frames])
  scatter = array_ops.reshape(scatter, [batch_size, num_labels, num_frames])
  scatter = array_ops.where(
      math_ops.equal(scatter, 0.0),
      array_ops.fill(array_ops.shape(scatter), math_ops.log(0.0)),
      scatter)
  label_olabels = array_ops.transpose(scatter, [2, 0, 1])
  label_olabels = label_olabels[:, :, 1:]

  blank_olabels = math_ops.reduce_logsumexp(
      blank_states, axis=2, keepdims=True)

  return array_ops.concat([blank_olabels, label_olabels], axis=-1)
Exemple #6
0
def frames(signal, frame_length, frame_step, name=None):
  """Frame a signal into overlapping frames.

  May be used in front of spectral functions.

  For example:

  ```python
  pcm = tf.placeholder(tf.float32, [None, 9152])
  frames = tf.contrib.signal.frames(pcm, 512, 180)
  magspec = tf.abs(tf.spectral.rfft(frames, [512]))
  image = tf.expand_dims(magspec, 3)
  ```

  Args:
    signal: A `Tensor` of shape `[batch_size, signal_length]`.
    frame_length: An `int32` or `int64` `Tensor`. The length of each frame.
    frame_step: An `int32` or `int64` `Tensor`. The step between frames.
    name: A name for the operation (optional).

  Returns:
    A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`.

  Raises:
    ValueError: if signal does not have rank 2.
  """
  with ops.name_scope(name, "frames", [signal, frame_length, frame_step]):
    signal = ops.convert_to_tensor(signal, name="signal")
    frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
    frame_step = ops.convert_to_tensor(frame_step, name="frame_step")

    signal_rank = signal.shape.ndims

    if signal_rank != 2:
      raise ValueError("expected signal to have rank 2 but was " + signal_rank)

    signal_length = array_ops.shape(signal)[1]

    num_frames = math_ops.ceil((signal_length - frame_length) / frame_step)
    num_frames = 1 + math_ops.cast(num_frames, dtypes.int32)

    pad_length = (num_frames - 1) * frame_step + frame_length
    pad_signal = array_ops.pad(signal, [[0, 0], [0,
                                                 pad_length - signal_length]])

    indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0)
    indices_frames = array_ops.tile(indices_frame, [num_frames, 1])

    indices_step = array_ops.expand_dims(
        math_ops.range(num_frames) * frame_step, 1)
    indices_steps = array_ops.tile(indices_step, [1, frame_length])

    indices = indices_frames + indices_steps

    # TODO(androbin): remove `transpose` when `gather` gets `axis` support
    pad_signal = array_ops.transpose(pad_signal)
    signal_frames = array_ops.gather(pad_signal, indices)
    signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1])

    return signal_frames
Exemple #7
0
  def _SparseMatMul(t1, t2, transpose_a=False, transpose_b=False):
    """Helper function to create SparseMatMul op."""

    assert t1 in is_sparse and t2 in is_sparse
    t1_sparse = is_sparse[t1]
    t2_sparse = is_sparse[t2]
    if not t1_sparse and not t2_sparse:
      return math_ops.matmul(t1, t2,
                             transpose_a=transpose_a,
                             transpose_b=transpose_b)
    transpose_out = False
    if not t1_sparse:
      transpose_out = True
      t1, t2 = t2, t1
      t1_sparse, t2_sparse = t2_sparse, t1_sparse
      assert t1_sparse
      transpose_a, transpose_b = not transpose_b, not transpose_a

    if transpose_b:
      t2 = array_ops.transpose(t2)
      transpose_b = False
    m = math_ops.matmul(t1, t2,
                        transpose_a=transpose_a,
                        transpose_b=transpose_b,
                        a_is_sparse=t1_sparse,
                        b_is_sparse=t2_sparse)
    if transpose_out:
      m = array_ops.transpose(m)
    return m
Exemple #8
0
def separable_lstm(images, num_filters_out,
                   kernel_size=None, nhidden=None, scope=None):
  """Run bidirectional LSTMs first horizontally then vertically.

  Args:
    images: (num_images, height, width, depth) tensor
    num_filters_out: output layer depth
    kernel_size: A list of length 2 holding the [kernel_height, kernel_width] of
      of the pooling. Can be an int if both values are the same. Set to None for
      not using blocks
    nhidden: hidden layer depth
    scope: optional scope name

  Returns:
    (num_images, height/kernel_height, width/kernel_width,
    num_filters_out) tensor
  """
  with variable_scope.variable_scope(scope, "SeparableLstm", [images]):
    if nhidden is None:
      nhidden = num_filters_out
    if kernel_size is not None:
      images = get_blocks(images, kernel_size)
    hidden = horizontal_lstm(images, nhidden)
    with variable_scope.variable_scope("vertical"):
      transposed = array_ops.transpose(hidden, [0, 2, 1, 3])
      output_transposed = horizontal_lstm(transposed, num_filters_out)
    output = array_ops.transpose(output_transposed, [0, 2, 1, 3])
    return output
Exemple #9
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape_tensor()
    event_shape = self.event_shape_tensor()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat([[n], batch_shape, event_shape], 0)

    # Complexity: O(nbk**2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    expanded_df = self.df * array_ops.ones(
        self.scale_operator.batch_shape_tensor(),
        dtype=self.df.dtype.base_dtype)
    g = random_ops.random_gamma(shape=[n],
                                alpha=self._multi_gamma_sequence(
                                    0.5 * expanded_df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk**2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk**2)
    perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so
    # this complexity is O(nbk**2). For LinearOperatorLowerTriangular,
    # each matmul is O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator.matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk**2)
    shape = array_ops.concat([batch_shape, event_shape, [n]], 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias, units):
  inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
  input_h = array_ops.expand_dims(input_h, axis=0)
  input_c = array_ops.expand_dims(input_c, axis=0)

  params = _canonical_to_params(
      weights=[
          kernel[:, :units],
          kernel[:, units:units * 2],
          kernel[:, units * 2:units * 3],
          kernel[:, units * 3:],
          recurrent_kernel[:, :units],
          recurrent_kernel[:, units:units * 2],
          recurrent_kernel[:, units * 2:units * 3],
          recurrent_kernel[:, units * 3:],
      ],
      biases=[
          bias[:units],
          bias[units:units * 2],
          bias[units * 2:units * 3],
          bias[units * 3:units * 4],
          bias[units * 4:units * 5],
          bias[units * 5:units * 6],
          bias[units * 6:units * 7],
          bias[units * 7:],
      ],
      shape=constant_op.constant([-1]))

  outputs, h, c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
      inputs, input_h=input_h, input_c=input_c, params=params)
  outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
  h = h[0]
  c = c[0]
  return outputs, [h, c], constant_op.constant(
      'cudnn', dtype=dtypes.string, name='runtime')
Exemple #11
0
def _ExtractImagePatchesGrad(op, grad):
  batch_size, rows_in, cols_in, channels = [
      dim.value for dim in op.inputs[0].shape.dims
  ]
  input_bhwc = array_ops.shape(op.inputs[0])
  batch_size = input_bhwc[0]
  channels = input_bhwc[3]

  # Create indices matrix for input tensor.
  # Note that 0 is preserved for padding location,
  # so indices for input start from 1 to 1 + rows_in * cols_in.
  input_indices_num = 1 + rows_in * cols_in
  input_idx = array_ops.reshape(math_ops.range(1, input_indices_num,
                                               dtype=ops.dtypes.int64),
                                (1, rows_in, cols_in, 1))
  input_idx_patched = gen_array_ops.extract_image_patches(
      input_idx,
      op.get_attr("ksizes"),
      op.get_attr("strides"),
      op.get_attr("rates"),
      op.get_attr("padding"))

  # Create indices matrix for output tensor.
  _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].shape.dims]
  _, ksize_r, ksize_c, _ = op.get_attr("ksizes")
  # Indices for output start from 0.
  output_indices_num = rows_out * cols_out * ksize_r * ksize_c
  output_idx = array_ops.reshape(math_ops.range(output_indices_num,
                                                dtype=ops.dtypes.int64),
                                 (1, rows_out, cols_out, ksize_r * ksize_c))

  # Construct mapping table for indices: (input -> output).
  idx_matrix = array_ops.concat(
      [array_ops.expand_dims(input_idx_patched, axis=-1),
       array_ops.expand_dims(output_idx, axis=-1)],
      axis=-1)
  idx_map = array_ops.reshape(idx_matrix, (-1, 2))

  sp_shape = (input_indices_num, output_indices_num)
  sp_mat_full = sparse_tensor.SparseTensor(
      idx_map,
      array_ops.ones([output_indices_num], dtype=grad.dtype),
      sp_shape)
  # Remove all padding locations [0, :].
  sp_mat = sparse_ops.sparse_slice(sp_mat_full,
                                   (1, 0),
                                   (input_indices_num - 1, output_indices_num))

  grad_expanded = array_ops.transpose(
      array_ops.reshape(
          grad, (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)),
      (1, 2, 3, 4, 0, 5))
  grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

  jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

  grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels))
  grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

  return [grad_out]
  def make_inverse_update_ops(self):
    """Create and return update ops corresponding to registered computations."""
    ops = []

    num_inverses = len(self._inverses_by_damping)
    matrix_power_registered = bool(self._matpower_by_exp_and_damping)
    use_eig = (
        self._eigendecomp or matrix_power_registered or
        num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD)

    if use_eig:
      self.register_eigendecomp()  # ensures self._eigendecomp is set
      eigenvalues, eigenvectors = self._eigendecomp  # pylint: disable=unpacking-non-sequence

      for damping, inv in self._inverses_by_damping.items():
        ops.append(
            inv.assign(
                math_ops.matmul(eigenvectors / (eigenvalues + damping),
                                array_ops.transpose(eigenvectors))))

      for (exp, damping), matpower in self._matpower_by_exp_and_damping.items():
        ops.append(
            matpower.assign(
                math_ops.matmul(eigenvectors *
                                (eigenvalues + damping)**exp,
                                array_ops.transpose(eigenvectors))))
      # These ops share computation and should be run on a single device.
      ops = [control_flow_ops.group(*ops)]
    else:
      for damping, inv in self._inverses_by_damping.items():
        ops.append(inv.assign(utils.posdef_inv(self._cov, damping)))

    return ops
Exemple #13
0
  def make_inverse_update_ops(self):
    """Create and return update ops corresponding to registered computations."""
    ops = super(InverseProvidingFactor, self).make_inverse_update_ops()

    num_inverses = len(self._inverses_by_damping)
    matrix_power_registered = bool(self._matpower_by_exp_and_damping)
    use_eig = (self._eigendecomp or matrix_power_registered or
               num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD)

    if use_eig:
      self.register_eigendecomp()  # ensures self._eigendecomp is set
      eigenvalues, eigenvectors = self._eigendecomp  # pylint: disable=unpacking-non-sequence

      # The matrix self._cov is positive semidefinite by construction, but the
      # numerical eigenvalues could be negative due to numerical errors, so here
      # we clip them to be at least EIGENVALUE_CLIPPING_THRESHOLD.
      clipped_eigenvalues = math_ops.maximum(eigenvalues,
                                             EIGENVALUE_CLIPPING_THRESHOLD)

      for damping, inv in self._inverses_by_damping.items():
        ops.append(
            inv.assign(
                math_ops.matmul(eigenvectors / (clipped_eigenvalues + damping),
                                array_ops.transpose(eigenvectors))))

      for (exp, damping), matpower in self._matpower_by_exp_and_damping.items():
        ops.append(
            matpower.assign(
                math_ops.matmul(eigenvectors * (clipped_eigenvalues + damping)**
                                exp, array_ops.transpose(eigenvectors))))
    else:
      for damping, inv in self._inverses_by_damping.items():
        ops.append(inv.assign(utils.posdef_inv(self._cov, damping)))

    return ops
Exemple #14
0
def functional_rnn(cell, inputs, sequence_length=None,
                   initial_state=None, dtype=None, time_major=False,
                   scope=None, use_tpu=False):
  """Same interface as `tf.nn.dynamic_rnn`."""
  with variable_scope.variable_scope(scope or 'rnn'):
    if not time_major:
      inputs = nest.map_structure(
          lambda t: array_ops.transpose(t, [1, 0, 2]), inputs)
    inputs_flat = nest.flatten(inputs)
    batch_size = array_ops.shape(inputs_flat[0])[1]
    if initial_state is None:
      initial_state = cell.zero_state(batch_size, dtype)
    func_cell = _FunctionalRnnCell(cell, inputs, initial_state)
  if sequence_length is not None:
    max_length = math_ops.reduce_max(sequence_length)
  else:
    max_length = None
  extended_acc_state, extended_final_state = recurrent.Recurrent(
      theta=func_cell.theta,
      state0=func_cell.extended_initial_state,
      inputs=inputs,
      cell_fn=func_cell.cell_step,
      max_input_length=max_length,
      use_tpu=use_tpu)
  tf_output, tf_state = _PostProcessOutput(
      extended_acc_state, extended_final_state, func_cell,
      inputs_flat[0].shape[0], sequence_length)

  if time_major:
    tf_output = array_ops.transpose(tf_output, [1, 0, 2])
  return tf_output, tf_state
    def runFiniteDifferences(self, shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64), scalarTest=False):
        with self.test_session(use_gpu=False):
            for shape in shapes:
                for batch in False, True:
                    for dtype in dtypes:
                        if not scalarTest:
                            x = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype)
                            tensor = math_ops.matmul(x, array_ops.transpose(x)) / shape[0]
                        else:
                            # This is designed to be a faster test for larger matrices.
                            x = constant_op.constant(np.random.randn(), dtype)
                            R = constant_op.constant(np.random.randn(shape[0], shape[1]), dtype)
                            e = math_ops.mul(R, x)
                            tensor = math_ops.matmul(e, array_ops.transpose(e)) / shape[0]

                        # Inner-most matrices in tensor are positive definite.
                        if batch:
                            tensor = array_ops.tile(array_ops.expand_dims(tensor, 0), [4, 1, 1])
                        y = linalg_ops.cholesky(tensor)
                        if scalarTest:
                            y = math_ops.reduce_mean(y)
                        error = gradient_checker.compute_gradient_error(x, x._shape_as_list(), y, y._shape_as_list())
                        tf_logging.info("error = %f", error)
                        if dtype == dtypes_lib.float64:
                            self.assertLess(error, 1e-5)
                        else:
                            self.assertLess(error, 3e-3)
def build_graph(device, input_shape, perm, datatype, num_iters):
  """builds a graph containing a sequence of conv2d operations.

  Args:
    device: String, the device to run on.
    input_shape: Shape of the input tensor.
    perm: A list of ints with the same length as input tensor's dimension.
    datatype: numpy data type of the input tensor.
    num_iters: number of iterations to run transpose.

  Returns:
    An array of tensors to run()
  """
  with ops.device("/%s:0" % device):
    total_size = np.prod(input_shape)
    inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape)
    t = constant_op.constant(inp, shape=input_shape)

    outputs = []
    transpose_op = array_ops.transpose(t, perm)
    outputs.append(transpose_op)
    for _ in range(1, num_iters):
      with ops.control_dependencies([transpose_op]):
        transpose_op = array_ops.transpose(t, perm)
        outputs.append(transpose_op)
    return control_flow_ops.group(*outputs)
def gather_tree_from_array(t, parent_ids, sequence_length):
  """Calculates the full beams for `TensorArray`s.

  Args:
    t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of
      shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]`
      where `s` is the depth shape.
    parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`.
    sequence_length: The sequence length of shape `[batch_size, beam_width]`.

  Returns:
    A `Tensor` which is a stacked `TensorArray` of the same size and type as
    `t` and where beams are sorted in each `Tensor` according to `parent_ids`.
  """
  max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0]
  batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1]
  beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2]

  # Generate beam ids that will be reordered by gather_tree.
  beam_ids = array_ops.expand_dims(
      array_ops.expand_dims(math_ops.range(beam_width), 0), 0)
  beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1])

  mask = array_ops.sequence_mask(
      sequence_length, maxlen=max_time, dtype=dtypes.int32)
  mask = array_ops.transpose(mask, perm=[2, 0, 1])

  # Use beam_width + 1 to mark the end of beam.
  masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1)

  max_sequence_lengths = math_ops.to_int32(
      math_ops.reduce_max(sequence_length, axis=1))
  sorted_beam_ids = beam_search_ops.gather_tree(
      step_ids=masked_beam_ids,
      parent_ids=parent_ids,
      max_sequence_lengths=max_sequence_lengths,
      end_token=beam_width + 1)

  # For out of range steps, simply copy the same beam.
  sorted_beam_ids = array_ops.where(
      math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids)

  # Generate indices for gather_nd.
  time_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width])
  batch_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width])
  batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2])
  indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1)

  # Gather from a tensor with collapsed additional dimensions.
  gather_from = t
  final_shape = array_ops.shape(gather_from)
  gather_from = array_ops.reshape(
      gather_from, [max_time, batch_size, beam_width, -1])
  ordered = array_ops.gather_nd(gather_from, indices)
  ordered = array_ops.reshape(ordered, final_shape)

  return ordered
 def testError(self):
   with self.assertRaises(ValueError):
     array_ops.transpose(
         np.arange(0., 30).reshape([2, 3, 5]), [[0, 1], [2, 3]])
   with self.assertRaises(ValueError):
     array_ops.transpose(np.arange(0., 30).reshape([2, 3, 5]), [0, 1, 3])
   self._testError(
       np.arange(0., 30).reshape([2, 3, 5]), [0, 1, 1], "2 is missing")
Exemple #19
0
def _GatherV2Grad(op, grad):
  """Gradient for GatherV2 op."""
  # params can be large, so colocate the shape calculation with it.
  #
  # params can be very large for sparse model, array_ops.shape raises
  # exception on the Windows platform when any dimension is larger than
  # int32. params_shape is not used in optimizer apply_sparse gradients,
  # so it's fine to convert it back to int32 regardless of truncation.
  params = op.inputs[0]
  with ops.colocate_with(params):
    params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
    params_shape = math_ops.to_int32(params_shape)

  indices = op.inputs[1]
  indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
  axis = op.inputs[2]
  axis_static = tensor_util.constant_value(axis)

  # For axis 0 gathers, build an appropriately shaped IndexedSlices.
  if axis_static == 0:
    values_shape = array_ops.concat([indices_size, params_shape[1:]], 0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)
    return [ops.IndexedSlices(values, indices, params_shape), None, None]

  outer_shape = params_shape[:axis]
  outer_dims = array_ops.size(outer_shape)
  inner_shape = params_shape[axis:][1:]
  inner_dims = array_ops.size(inner_shape)

  outer_axes_indices = math_ops.range(outer_dims)
  inner_axes_indices = math_ops.range(outer_dims + 1,
                                      outer_dims + 1 + inner_dims)

  values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0)
  values = array_ops.reshape(grad, values_shape)
  indices = array_ops.reshape(indices, indices_size)

  # We need to sum up every slice `values[..., i, ....]` corresponding to
  # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
  # support an axis parameter, we transpose the gather dimension to the front,
  # then use `unsorted_segment_sum` to build a
  # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
  # affecting each index in `gather_axis` summed up.
  transpose_dims = array_ops.concat(
      [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
  values_transpose = array_ops.transpose(values, transpose_dims)
  num_segments = params_shape[axis]

  params_grad = math_ops.unsorted_segment_sum(
      values_transpose, indices, num_segments)

  # Inverts the above transpose by moving dimension 0 back to its original
  # position.
  invert_transpose_dims = array_ops.concat(
      [outer_axes_indices + 1, [0], inner_axes_indices], 0)
  params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
  return [params_grad, None, None]
Exemple #20
0
  def set_model(self, model):
    """Sets Keras model and creates summary ops."""

    self.model = model
    self.sess = K.get_session()
    # only make histogram summary op if it hasn't already been made
    if self.histogram_freq and self.merged is None:
      for layer in self.model.layers:
        for weight in layer.weights:
          mapped_weight_name = weight.name.replace(':', '_')
          tf_summary.histogram(mapped_weight_name, weight)
          if self.write_images:
            w_img = array_ops.squeeze(weight)
            shape = K.int_shape(w_img)
            if len(shape) == 2:  # dense layer kernel case
              if shape[0] > shape[1]:
                w_img = array_ops.transpose(w_img)
                shape = K.int_shape(w_img)
              w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1])
            elif len(shape) == 3:  # convnet case
              if K.image_data_format() == 'channels_last':
                # switch to channels_first to display
                # every kernel as a separate image
                w_img = array_ops.transpose(w_img, perm=[2, 0, 1])
                shape = K.int_shape(w_img)
              w_img = array_ops.reshape(w_img,
                                        [shape[0], shape[1], shape[2], 1])
            elif len(shape) == 1:  # bias case
              w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1])
            else:
              # not possible to handle 3D convnets etc.
              continue

            shape = K.int_shape(w_img)
            assert len(shape) == 4 and shape[-1] in [1, 3, 4]
            tf_summary.image(mapped_weight_name, w_img)

        if self.write_grads:
          for weight in layer.trainable_weights:
            mapped_weight_name = weight.name.replace(':', '_')
            grads = model.optimizer.get_gradients(model.total_loss, weight)

            def is_indexed_slices(grad):
              return type(grad).__name__ == 'IndexedSlices'

            grads = [grad.values if is_indexed_slices(grad) else grad
                     for grad in grads]
            tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads)

        if hasattr(layer, 'output'):
          tf_summary.histogram('{}_out'.format(layer.name), layer.output)
    self.merged = tf_summary.merge_all()

    if self.write_graph:
      self.writer = self._writer_class(self.log_dir, self.sess.graph)
    else:
      self.writer = self._writer_class(self.log_dir)
Exemple #21
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape()
    event_shape = self.event_shape()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat(((n,), batch_shape, event_shape), 0)

    # Complexity: O(nbk^2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    g = random_ops.random_gamma(shape=(n,),
                                alpha=self._multi_gamma_sequence(
                                    0.5 * self.df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk^2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk^2)
    perm = array_ops.concat((math_ops.range(1, ndims), (0,)), 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat((batch_shape, (event_shape[0], -1)), 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system.  E.g., for OperatorPDDiag, each matmul is O(k^2), so
    # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is
    # O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator_pd.sqrt_matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk^2)
    shape = array_ops.concat((batch_shape, event_shape, (n,)), 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat(((ndims - 1,), math_ops.range(0, ndims - 1)), 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
Exemple #22
0
def _FusedBatchNormGrad(op, *grad):
  """Return the gradients for the 3 inputs of BatchNorm.

  Args:
    op: The BatchNormOp for which we need to compute gradients.
    *grad: An argument list for tensors of gradients wrt the outputs
          with grad[0] as grad_y.

  Returns:
    grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) *
            [grad_y - mean(grad_y) - (x - mean(x)) *
            mean(grad_y * (x - mean(x))) / (variance + epsilon)]
            in training mode; grad_y * scale * rsqrt(pop_variance + epsilon)
            in freeze mode.

    grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) *
                rsqrt(variance + epsilon)) in training mode;
                sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon))
                in freeze mode.

    grad_offset: gradient for offset, which is sum(grad_y) in training mode;
                 sum(grad_y) in freeze mode.
  """
  x = op.inputs[0]
  grad_y = grad[0]
  scale = op.inputs[1]
  epsilon = op.get_attr("epsilon")
  data_format = op.get_attr("data_format")
  is_training = op.get_attr("is_training")
  if is_training:
    return gen_nn_ops.fused_batch_norm_grad(
        grad_y,
        x,
        scale,
        op.outputs[3],
        op.outputs[4],
        epsilon=epsilon,
        data_format=data_format,
        is_training=is_training)
  else:
    pop_mean = op.inputs[3]
    pop_var = op.inputs[4]
    if data_format == b"NCHW":
      x = array_ops.transpose(x, [0, 2, 3, 1])
      grad_y = array_ops.transpose(grad_y, [0, 2, 3, 1])
    dx, dscale, doffset, _, _ = gen_nn_ops.fused_batch_norm_grad(
        grad_y,
        x,
        scale,
        pop_mean,
        pop_var,
        epsilon=epsilon,
        data_format='NHWC',
        is_training=is_training)
    if data_format == b"NCHW":
      dx = array_ops.transpose(dx, [0, 3, 1, 2])
    return dx, dscale, doffset, None, None
 def spatial_conv(batch, gain):
   s = array_ops.shape(batch)
   padded = array_ops.pad(batch, [[0, 0], [2, 2], [2, 2], [0, 0]], 'REFLECT')
   xt = array_ops.transpose(padded, [0, 3, 1, 2])
   xt = array_ops.reshape(xt, [s[0] * s[3], s[1] + 4, s[2] + 4, 1])
   conv_out = nn_ops.conv2d(xt, gaussian_filter * gain, [1] * 4, 'VALID')
   conv_xt = array_ops.reshape(conv_out, [s[0], s[3], s[1], s[2]])
   conv_xt = array_ops.transpose(conv_xt, [0, 2, 3, 1])
   return conv_xt
Exemple #24
0
def _SparseTensorDenseMatMulGrad(op, grad):
  """Gradients for the dense tensor in the SparseTensorDenseMatMul op.

  If either input is complex, no gradient is provided.

  Args:
    op: the SparseTensorDenseMatMul op
    grad: the incoming gradient

  Returns:
    Gradient for each of the 4 input tensors:
      (sparse_indices, sparse_values, sparse_shape, dense_tensor)
    The gradients for indices and shape are None.

  Raises:
    TypeError: When the two operands don't have the same type.
  """
  sp_t = ops.SparseTensor(*op.inputs[:3])
  adj_a = op.get_attr("adjoint_a")
  adj_b = op.get_attr("adjoint_b")

  a_type = sp_t.values.dtype.base_dtype
  b_type = op.inputs[3].dtype.base_dtype
  if a_type != b_type:
    raise TypeError("SparseTensorDenseMatMul op received operands with "
                    "different types: ", a_type, " and ", b_type)
  is_complex = a_type == ops.dtypes.complex64
  if is_complex:
    raise NotImplementedError("SparseTensorDenseMatMul op does not support "
                              "complex gradients.")

  # gradient w.r.t. dense
  b_grad = sparse_ops.sparse_tensor_dense_matmul(sp_t, grad,
                                                 adjoint_a=not adj_a)
  if adj_b:
    b_grad = array_ops.transpose(b_grad)

  # gradient w.r.t. sparse values
  a_indices = op.inputs[0]
  b = op.inputs[3]

  rows = a_indices[:, 0]
  cols = a_indices[:, 1]

  # TODO(zongheng, ebrevdo): add conjugates in the right places when complex
  # values are allowed.
  # TODO(zongheng): these gather calls could potentially duplicate rows/cols in
  # memory.  If there is a need, we should look into implementing this more
  # intelligently to avoid duplicating data.
  parts_a = array_ops.gather(grad, rows if not adj_a else cols)
  parts_b = array_ops.gather(b if not adj_b else array_ops.transpose(b),
                             cols if not adj_a else rows)
  a_values_grad = math_ops.reduce_sum(parts_a * parts_b, reduction_indices=1)

  # gradients w.r.t. (a_indices, a_values, a_shape, b)
  return (None, a_values_grad, None, b_grad)
 def _inference_ref(self, x, scale, offset, mean, var, epsilon, data_format):
   if data_format not in ['NHWC', 'NCHW']:
     raise ValueError('data_format must be NCHW or NHWC, '
                      'got %s.' % data_format)
   if data_format == 'NCHW':
     x = array_ops.transpose(x, [0, 2, 3, 1])
   y = self._batch_norm(x, mean, var, offset, scale, epsilon)
   if data_format == 'NCHW':
     y = array_ops.transpose(y, [0, 3, 1, 2])
   return y.eval()
Exemple #26
0
def _my_matmul_grad(op, dl_dc):
    a = op.inputs[0]
    b = op.inputs[1]
    
    #dl_da = math_ops.matmul( dl_dc, array_ops.transpose(b, [1,0]))
    #dl_db = math_ops.matmul( array_ops.transpose(a, [1,0]), dl_dc )
    dl_da = my_matmul( dl_dc, array_ops.transpose(b, [1,0]))
    dl_db = my_matmul( array_ops.transpose(a, [1,0]), dl_dc )
    
    return dl_da, dl_db
Exemple #27
0
  def _cudnn_to_tf_weights(self, *cu_weights):
    r"""Stitching cudnn canonical weights to generate tf canonical weights."""
    w_i, w_r, w_h, r_i, r_r, r_h = cu_weights

    # pylint: disable=invalid-name
    W_i = array_ops.concat([w_i, r_i], axis=1)
    W_r = array_ops.concat([w_r, r_r], axis=1)
    # pylint: enable=invalid-name
    return (array_ops.transpose(array_ops.concat([W_i, W_r], axis=0)),
            array_ops.transpose(w_h), array_ops.transpose(r_h))
  def test4DGPU(self):
    # If no GPU available, skip the test
    if not test.is_gpu_available(cuda_only=True):
      return
    large_shapes = [[4, 10, 10, 3], [4, 10, 10, 8], [4, 10, 10, 13],
                    [4, 3, 10, 10], [4, 8, 10, 10], [4, 13, 10, 10]] * 3
    perms = [[0, 3, 1, 2]] * 3 + [[0, 2, 3, 1]] * 3 + [[3, 1, 2, 0]] * 6 + [[
        1, 2, 3, 0
    ]] * 3 + [[2, 3, 0, 1]] * 3

    for input_shape, perm in zip(large_shapes, perms):
      total_size = np.prod(input_shape)
      inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
      np_ans = self._np_transpose(inp, perm)
      with self.test_session(use_gpu=True):
        inx = ops.convert_to_tensor(inp)
        y = array_ops.transpose(inx, perm)
        tf_ans = y.eval()
      self.assertAllEqual(np_ans, tf_ans)
      self.assertShapeEqual(np_ans, y)

    # shapes related to Inception (taken from conv_ops_test.py)
    inception_shapes = [[4, 5, 5, 124], [4, 8, 8, 38], [4, 8, 8, 38], [
        4, 8, 8, 204
    ], [4, 8, 8, 44], [4, 8, 8, 204], [4, 8, 8, 204], [4, 8, 8, 204], [
        4, 8, 8, 176
    ], [4, 8, 8, 176], [4, 8, 8, 176], [4, 8, 8, 176], [4, 17, 17, 19], [
        4, 17, 17, 19
    ], [4, 17, 17, 124], [4, 17, 17, 12], [4, 17, 17, 124], [4, 17, 17, 22], [
        4, 17, 17, 19
    ], [4, 17, 17, 19], [4, 17, 17, 121], [4, 17, 17, 121], [4, 17, 17, 22], [
        4, 17, 17, 19
    ], [4, 17, 17, 19], [4, 17, 17, 115], [4, 17, 17, 115], [4, 17, 17, 19], [
        4, 17, 17, 16
    ], [4, 17, 17, 115], [4, 17, 17, 102], [4, 17, 17, 12], [4, 17, 17, 102], [
        4, 17, 17, 12
    ], [4, 17, 17, 102], [4, 17, 17, 12], [4, 17, 17, 76], [4, 17, 17, 12], [
        4, 17, 17, 12
    ], [4, 17, 17, 76], [4, 17, 17, 76], [4, 35, 35, 9], [4, 35, 35, 28], [
        4, 35, 35, 6
    ], [4, 35, 35, 28], [4, 35, 35, 25], [4, 35, 35, 4], [4, 35, 35, 25],
                        [4, 35, 35, 9], [4, 35, 35, 19], [4, 35, 35, 19],
                        [4, 35, 35, 19], [4, 73, 73, 6], [4, 73, 73,
                                                          6], [4, 147, 147, 2]]
    for input_shape in inception_shapes:
      perm = [0, 3, 1, 2]
      total_size = np.prod(input_shape)
      inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
      np_ans = self._np_transpose(inp, perm)
      with self.test_session(use_gpu=True):
        inx = ops.convert_to_tensor(inp)
        y = array_ops.transpose(inx, perm)
        tf_ans = y.eval()
      self.assertAllEqual(np_ans, tf_ans)
      self.assertShapeEqual(np_ans, y)
 def testSpaceToDepthTranspose(self):
   x = np.arange(5 * 10 * 16 * 7, dtype=np.float32).reshape([5, 10, 16, 7])
   block_size = 2
   paddings = np.zeros((2, 2), dtype=np.int32)
   y1 = self.space_to_batch(x, paddings, block_size=block_size)
   y2 = array_ops.transpose(
       array_ops.space_to_depth(
           array_ops.transpose(x, [3, 1, 2, 0]), block_size=block_size),
       [3, 1, 2, 0])
   with self.test_session(use_gpu=True):
     self.assertAllEqual(y1.eval(), y2.eval())
 def _training_ref(self, x, scale, offset, epsilon, data_format):
   if data_format not in ['NHWC', 'NCHW']:
     raise ValueError('data_format must be NCHW or NHWC, '
                      'got %s.' % data_format)
   if data_format == 'NCHW':
     x = array_ops.transpose(x, [0, 2, 3, 1])
   mean, var = nn_impl.moments(x, [0, 1, 2], keep_dims=False)
   y = nn_impl.batch_normalization(x, mean, var, offset, scale, epsilon)
   if data_format == 'NCHW':
     y = array_ops.transpose(y, [0, 3, 1, 2])
   return y.eval(), mean.eval(), var.eval()
Exemple #31
0
 def run_fn():
     with lsgt.LossScaleGradientTape(loss_scale) as g:
         y = array_ops.transpose(x) * 2.
     return g.gradient(y, x)
Exemple #32
0
def dynamic_rnn(cell,
                inputs,
                sequence_length=None,
                initial_state=None,
                dtype=None,
                parallel_iterations=None,
                swap_memory=False,
                time_major=False,
                scope=None):
    """Creates a recurrent neural network specified by RNNCell `cell`.

  This function is functionally identical to the function `rnn` above, but
  performs fully dynamic unrolling of `inputs`.

  Unlike `rnn`, the input `inputs` is not a Python list of `Tensors`, one for
  each frame.  Instead, `inputs` may be a single `Tensor` where
  the maximum time is either the first or second dimension (see the parameter
  `time_major`).  Alternatively, it may be a (possibly nested) tuple of
  Tensors, each of them having matching batch and time dimensions.
  The corresponding output is either a single `Tensor` having the same number
  of time steps and batch size, or a (possibly nested) tuple of such tensors,
  matching the nested structure of `cell.output_size`.

  The parameter `sequence_length` is optional and is used to copy-through state
  and zero-out outputs when past a batch element's sequence length. So it's more
  for correctness than performance, unlike in rnn().

  Args:
    cell: An instance of RNNCell.
    inputs: The RNN inputs.

      If `time_major == False` (default), this must be a `Tensor` of shape:
        `[batch_size, max_time, ...]`, or a nested tuple of such
        elements.

      If `time_major == True`, this must be a `Tensor` of shape:
        `[max_time, batch_size, ...]`, or a nested tuple of such
        elements.

      This may also be a (possibly nested) tuple of Tensors satisfying
      this property.  The first two dimensions must match across all the inputs,
      but otherwise the ranks and other shape components may differ.
      In this case, input to `cell` at each time-step will replicate the
      structure of these tuples, except for the time dimension (from which the
      time is taken).

      The input to `cell` at each time step will be a `Tensor` or (possibly
      nested) tuple of Tensors each with dimensions `[batch_size, ...]`.
    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`.
    initial_state: (optional) An initial state for the RNN.
      If `cell.state_size` is an integer, this must be
      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
      If `cell.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
      parallel.  Those operations which do not have any temporal dependency
      and can be run in parallel, will be.  This parameter trades off
      time for space.  Values >> 1 use more memory but take less time,
      while smaller values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
      but needed for back prop from GPU to CPU.  This allows training RNNs
      which would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
    time_major: The shape format of the `inputs` and `outputs` Tensors.
      If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
      If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
      Using `time_major = True` is a bit more efficient because it avoids
      transposes at the beginning and end of the RNN calculation.  However,
      most TensorFlow data is batch-major, so by default this function
      accepts input and emits output in batch-major form.
    scope: VariableScope for the created subgraph; defaults to "RNN".

  Returns:
    A pair (outputs, state) where:

      outputs: The RNN output `Tensor`.

        If time_major == False (default), this will be a `Tensor` shaped:
          `[batch_size, max_time, cell.output_size]`.

        If time_major == True, this will be a `Tensor` shaped:
          `[max_time, batch_size, cell.output_size]`.

        Note, if `cell.output_size` is a (possibly nested) tuple of integers
        or `TensorShape` objects, then `outputs` will be a tuple having the
        same structure as `cell.output_size`, containing Tensors having shapes
        corresponding to the shape data in `cell.output_size`.

      state: The final state.  If `cell.state_size` is an int, this
        will be shaped `[batch_size, cell.state_size]`.  If it is a
        `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
        If it is a (possibly nested) tuple of ints or `TensorShape`, this will
        be a tuple having the corresponding shapes.

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If inputs is None or an empty list.
  """

    if not isinstance(cell, rnn_cell.RNNCell):
        raise TypeError("cell must be an instance of RNNCell")

    # By default, time_major==False and inputs are batch-major: shaped
    #   [batch, time, depth]
    # For internal calculations, we transpose to [time, batch, depth]
    flat_input = nest.flatten(inputs)

    if not time_major:
        # (B,T,D) => (T,B,D)
        flat_input = tuple(
            array_ops.transpose(input_, [1, 0, 2]) for input_ in flat_input)

    parallel_iterations = parallel_iterations or 32
    if sequence_length is not None:
        sequence_length = math_ops.to_int32(sequence_length)
        if sequence_length.get_shape().ndims not in (None, 1):
            raise ValueError(
                "sequence_length must be a vector of length batch_size, "
                "but saw shape: %s" % sequence_length.get_shape())
        sequence_length = array_ops.identity(  # Just to find it in the graph.
            sequence_length,
            name="sequence_length")

    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    with vs.variable_scope(scope or "RNN") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)
        input_shape = tuple(array_ops.shape(input_) for input_ in flat_input)
        batch_size = input_shape[0][1]

        for input_ in input_shape:
            if input_[1].get_shape() != batch_size.get_shape():
                raise ValueError("All inputs should have the same batch size")

        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError(
                    "If no initial_state is provided, dtype must be.")
            state = cell.zero_state(batch_size, dtype)

        def _assert_has_shape(x, shape):
            x_shape = array_ops.shape(x)
            packed_shape = array_ops.pack(shape)
            return control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
                    "Expected shape for Tensor %s is " % x.name, packed_shape,
                    " but saw shape: ", x_shape
                ])

        if sequence_length is not None:
            # Perform some shape validation
            with ops.control_dependencies(
                [_assert_has_shape(sequence_length, [batch_size])]):
                sequence_length = array_ops.identity(sequence_length,
                                                     name="CheckSeqLen")

        inputs = nest.pack_sequence_as(structure=inputs,
                                       flat_sequence=flat_input)

        (outputs, final_state) = _dynamic_rnn_loop(
            cell,
            inputs,
            state,
            parallel_iterations=parallel_iterations,
            swap_memory=swap_memory,
            sequence_length=sequence_length,
            dtype=dtype)

        # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
        # If we are performing batch-major calculations, transpose output back
        # to shape [batch, time, depth]
        if not time_major:
            # (T,B,D) => (B,T,D)
            flat_output = nest.flatten(outputs)
            flat_output = [
                array_ops.transpose(output, [1, 0, 2])
                for output in flat_output
            ]
            outputs = nest.pack_sequence_as(structure=outputs,
                                            flat_sequence=flat_output)

        return (outputs, final_state)
 def f(x, y):
   return math_ops.matmul(
       x, array_ops.reshape(array_ops.transpose(y), [384, 1536]))
    def _VerifyValues(self,
                      tensor_in_sizes,
                      filter_in_sizes,
                      stride,
                      padding,
                      data_type,
                      use_gpu,
                      grouped_conv=False,
                      data_format="NHWC"):
        """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      data_type: The data type to use.
      use_gpu: Whether to use GPU.
      grouped_conv: Whether to use cuDNN 7's grouped convolution.
      data_format: The data_format of the input. "NHWC" or "NCHW".
    """
        input_size = 1
        filter_size = 1
        for s in tensor_in_sizes:
            input_size *= s
        for s in filter_in_sizes:
            filter_size *= s
        # Initializes the input and filter tensor with numbers incrementing from 1.
        x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)]
        x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)]
        ops.reset_default_graph()
        graph = ops.get_default_graph()
        with self.session(graph=graph, use_gpu=use_gpu) as sess:
            tolerance = {
                dtypes.float16: 4e-2,
                dtypes.float32: 1e-5,
                dtypes.float64: 1e-12,
            }[data_type]

            t1 = constant_op.constant(x1,
                                      shape=tensor_in_sizes,
                                      dtype=data_type)
            t1.set_shape(tensor_in_sizes)
            t2 = constant_op.constant(x2,
                                      shape=filter_in_sizes,
                                      dtype=data_type)

            native_t1 = t1
            strides = [1, stride, stride, 1]
            if data_format == "NCHW":
                # Transpose from NHWC input to NCHW
                # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
                native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
                strides = [1, 1, stride, stride]

            with sess.graph._kernel_label_map(
                {"DepthwiseConv2dNative": "cudnn_grouped_convolution"}
                    if grouped_conv else {}):
                conv_native = nn_ops.depthwise_conv2d_native(
                    native_t1,
                    t2,
                    strides=strides,
                    data_format=data_format,
                    padding=padding)

            if data_format == "NCHW":
                # Transpose back from NCHW to NHWC
                conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

            try:
                native_result = self.evaluate(conv_native)
            except errors.InvalidArgumentError as e:
                # Grouped convolution kernel is only registered for cuDNN 7. Silently
                # return when we are running on an earlier version or without GPU.
                if e.message.startswith(
                        "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"
                ):
                    tf_logging.warn("Skipping grouped convolution test")
                    return
                raise e

            conv_interface = nn_impl.depthwise_conv2d(
                t1, t2, strides=[1, stride, stride, 1], padding=padding)
            interface_result = self.evaluate(conv_interface)

        tf_logging.info(
            "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f",
            data_type, use_gpu, grouped_conv,
            np.amax(np.absolute(native_result - interface_result)))
        self.assertArrayNear(np.ravel(native_result),
                             np.ravel(interface_result), tolerance)
        self.assertShapeEqual(native_result, conv_native)
        self.assertShapeEqual(native_result, conv_interface)
 def call(self, inputs):
     if not isinstance(inputs, (list, tuple)):
         raise ValueError(
             'A merge layer should be called on a list of inputs.')
     if self._reshape_required:
         reshaped_inputs = []
         input_ndims = list(map(K.ndim, inputs))
         if None not in input_ndims:
             # If ranks of all inputs are available,
             # we simply expand each of them at axis=1
             # until all of them have the same rank.
             max_ndim = max(input_ndims)
             for x in inputs:
                 x_ndim = K.ndim(x)
                 for _ in range(max_ndim - x_ndim):
                     x = array_ops.expand_dims(x, axis=1)
                 reshaped_inputs.append(x)
             return self._merge_function(reshaped_inputs)
         else:
             # Transpose all inputs so that batch size is the last dimension.
             # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size)
             transposed = False
             for x in inputs:
                 x_ndim = K.ndim(x)
                 if x_ndim is None:
                     x_shape = array_ops.shape(x)
                     batch_size = x_shape[0]
                     new_shape = K.concatenate([
                         x_shape[1:],
                         array_ops.expand_dims(batch_size, axis=-1)
                     ])
                     x_transposed = array_ops.reshape(
                         x,
                         array_ops.stack([
                             batch_size,
                             math_ops.reduce_prod(x_shape[1:])
                         ],
                                         axis=0))
                     x_transposed = array_ops.transpose(x_transposed,
                                                        perm=(1, 0))
                     x_transposed = array_ops.reshape(
                         x_transposed, new_shape)
                     reshaped_inputs.append(x_transposed)
                     transposed = True
                 elif x_ndim > 1:
                     dims = list(range(1, x_ndim)) + [0]
                     reshaped_inputs.append(
                         array_ops.transpose(x, perm=dims))
                     transposed = True
                 else:
                     # We don't transpose inputs if they are 1D vectors or scalars.
                     reshaped_inputs.append(x)
             y = self._merge_function(reshaped_inputs)
             y_ndim = K.ndim(y)
             if transposed:
                 # If inputs have been transposed, we have to transpose the output too.
                 if y_ndim is None:
                     y_shape = array_ops.shape(y)
                     y_ndim = array_ops.shape(y_shape)[0]
                     batch_size = y_shape[y_ndim - 1]
                     new_shape = K.concatenate([
                         array_ops.expand_dims(batch_size, axis=-1),
                         y_shape[:y_ndim - 1]
                     ])
                     y = array_ops.reshape(y, (-1, batch_size))
                     y = array_ops.transpose(y, perm=(1, 0))
                     y = array_ops.reshape(y, new_shape)
                 elif y_ndim > 1:
                     dims = [y_ndim - 1] + list(range(y_ndim - 1))
                     y = array_ops.transpose(y, perm=dims)
             return y
     else:
         return self._merge_function(inputs)
def fill_lower_triangular(x,
                          validate_args=False,
                          name="fill_lower_triangular"):
    """Creates a (batch of) lower triangular matrix from a vector of inputs.

  If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1,
  b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
  `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`.

  Although the non-batch complexity is O(n^2), large constants and sub-optimal
  vectorization means the complexity of this function is 5x slower than zeroing
  out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`.  This
  function becomes competitive only when several matmul/cholesky/etc ops can be
  ellided in constructing the input.  Example: wiring a fully connected layer as
  a covariance matrix; this function reduces the final layer by 2x and possibly
  reduces the network arch complexity considerably.  In most cases it is better
  to simply build a full matrix and zero out the upper triangular elements,
  e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly
  construct a lower triangular.

  Example:

  ```python
  fill_lower_triangular([1, 2, 3, 4, 5, 6])
  # Returns: [[1, 0, 0],
  #           [2, 3, 0],
  #           [4, 5, 6]]
  ```

  For comparison, a pure numpy version of this function can be found in
  `distribution_util_test.py`, function `_fill_lower_triangular`.

  Args:
    x: `Tensor` representing lower triangular elements.
    validate_args: `Boolean`, default `False`.  Whether to ensure the shape of
      `x` can be mapped to a lower triangular matrix (controls non-static checks
      only).
    name: `String`. The name to give this op.

  Returns:
    tril: `Tensor` with lower triangular elements filled from `x`.

  Raises:
    ValueError: if shape if `x` has static shape which cannot be mapped to a
      lower triangular matrix.
  """
    # TODO(jvdillon): Replace this code with dedicated op when it exists.
    with ops.name_scope(name, values=(x, )):
        x = ops.convert_to_tensor(x, name="x")
        if (x.get_shape().ndims is not None
                and x.get_shape()[-1].value is not None):
            d = x.get_shape()[-1].value
            # d = n(n+1)/2 implies n is:
            n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))
            d_inferred = n * (n + 1) / 2
            if d != d_inferred:
                raise ValueError(
                    "Input cannot be mapped to a lower triangular; "
                    "n*(n+1)/2 = %d != %d" % (d_inferred, d))
            final_shape = x.get_shape()[:-1].concatenate(
                tensor_shape.TensorShape([n, n]))
        else:
            d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32)
            # d = n(n+1)/2 implies n is:
            n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.),
                              dtype=dtypes.int32)
            if validate_args:
                is_valid_input_shape = check_ops.assert_equal(
                    n * (n + 1) / 2,
                    d,
                    message="Input cannot be mapped to a lower triangular.")
                n = control_flow_ops.with_dependencies([is_valid_input_shape],
                                                       n)
            final_shape = x.get_shape()[:-1].concatenate(
                tensor_shape.TensorShape([None, None]))

        def tril_ids(n):
            """Internal helper to create vector of linear indices into y."""
            # Build the ids statically; chose 512 because it implies 1MiB.
            if not contrib_framework.is_tensor(n) and n <= 512:
                ids = np.arange(n**2, dtype=np.int32)
                rows = (ids / n).astype(np.int32)  # Implicit floor.
                # We need to stop incrementing the index when we encounter
                # upper-triangular elements.  The idea here is to compute the
                # lower-right number of zeros then by "symmetry" subtract this from the
                # total number of zeros, n(n-1)/2.
                # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2
                offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32)
                # We could also zero out when (rows < cols) == (rows < ids-n*rows).
                # mask = (ids <= (n + 1) * rows).astype(np.int32)
            else:
                ids = math_ops.range(n**2)
                rows = math_ops.cast(ids / n, dtype=dtypes.int32)
                offset = math_ops.cast(rows * (2 * n - rows - 1) / 2,
                                       dtype=dtypes.int32)
            return ids - offset

        # Special-case non-batch case.
        if x.get_shape().ndims == 1:
            y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n]))
            y = array_ops.matrix_band_part(y, -1, 0)
            y.set_shape(y.get_shape().merge_with(final_shape))
            return y

        # Make ids for each batch dim.
        if (x.get_shape().ndims is not None
                and x.get_shape()[:-1].is_fully_defined()):
            batch_shape = np.asarray(x.get_shape()[:-1].as_list(),
                                     dtype=np.int32)
            m = np.prod(batch_shape).astype(np.int32)
        else:
            batch_shape = array_ops.shape(x)[:-1]
            m = array_ops.reduce_prod(array_ops.shape(x)[:-1])
        batch_ids = math_ops.range(m)

        # Assemble the tril_ids into batch,tril_id pairs.
        idx = array_ops.pack([
            array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]),
            array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1])
        ])
        idx = array_ops.transpose(idx, [1, 2, 0])

        # Gather up, reshape, and return.
        y = array_ops.reshape(x, [-1, d])
        y = array_ops.gather_nd(y, idx)
        y = array_ops.reshape(y, array_ops.concat(0, [batch_shape, [n, n]]))
        y = array_ops.matrix_band_part(y, -1, 0)
        y.set_shape(y.get_shape().merge_with(final_shape))
        return y
Exemple #37
0
  def get_batch_loss(self, features, mode, state):
    """Computes predictions and a loss.

    Args:
      features: A dictionary (such as is produced by a chunker) with the
        following key/value pairs (shapes are given as required for training):
          TrainEvalFeatures.TIMES: A [batch size, self.window_size] integer
            Tensor with times for each observation. To train on longer
            sequences, the data should first be chunked.
          TrainEvalFeatures.VALUES: A [batch size, self.window_size,
            self.num_features] Tensor with values for each observation.
        When evaluating, `TIMES` and `VALUES` must have a window size of at
        least self.window_size, but it may be longer, in which case the last
        window_size - self.input_window_size times (or fewer if this is not
        divisible by self.output_window_size) will be evaluated on with
        non-overlapping output windows (and will have associated
        predictions). This is primarily to support qualitative
        evaluation/plotting, and is not a recommended way to compute evaluation
        losses (since there is no overlap in the output windows, which for
        window-based models is an undesirable bias).
      mode: The tf.estimator.ModeKeys mode to use (TRAIN or EVAL).
      state: Unused
    Returns:
      A model.ModelOutputs object.
    Raises:
      ValueError: If `mode` is not TRAIN or EVAL, or if static shape information
      is incorrect.
    """
    features = {feature_name: ops.convert_to_tensor(feature_value)
                for feature_name, feature_value in features.items()}
    times = features[TrainEvalFeatures.TIMES]
    exogenous_regressors = self._process_exogenous_features(
        times=times,
        features={key: value for key, value in features.items()
                  if key not in [TrainEvalFeatures.TIMES,
                                 TrainEvalFeatures.VALUES,
                                 PredictionFeatures.STATE_TUPLE]})
    if mode == estimator_lib.ModeKeys.TRAIN:
      # For training, we require the window size to be self.window_size as
      # iterating sequentially on larger windows could introduce a bias.
      return self._process_window(
          features, mode=mode, exogenous_regressors=exogenous_regressors)
    elif mode == estimator_lib.ModeKeys.EVAL:
      # For evaluation, we allow the user to pass in a larger window, in which
      # case we try to cover as much of the window as possible without
      # overlap. Quantitative evaluation is more efficient/correct with fixed
      # windows matching self.window_size (as with training), but this looping
      # allows easy plotting of "in-sample" predictions.
      times.get_shape().assert_has_rank(2)
      static_window_size = times.get_shape().dims[1].value
      if (static_window_size is not None
          and static_window_size < self.window_size):
        raise ValueError(
            ("ARModel requires a window of at least input_window_size + "
             "output_window_size to evaluate on (input_window_size={}, "
             "output_window_size={}, and got shape {} for feature '{}' (batch "
             "size, window size)).").format(
                 self.input_window_size, self.output_window_size,
                 times.get_shape(), TrainEvalFeatures.TIMES))
      num_iterations = ((array_ops.shape(times)[1] -  self.input_window_size)
                        // self.output_window_size)
      output_size = num_iterations * self.output_window_size
      # Rather than dealing with overlapping windows of output, discard a bit at
      # the beginning if output windows don't cover evenly.
      crop_length = output_size + self.input_window_size
      features = {feature_name: feature_value[:, -crop_length:]
                  for feature_name, feature_value in features.items()}
      # Note that, unlike the ARModel's predict() while_loop, each iteration
      # here can run in parallel, since we are not feeding predictions or state
      # from previous iterations.
      def _while_condition(iteration_number, loss_ta, mean_ta, covariance_ta):
        del loss_ta, mean_ta, covariance_ta  # unused
        return iteration_number < num_iterations

      def _while_body(iteration_number, loss_ta, mean_ta, covariance_ta):
        """Perform a processing step on a single window of data."""
        base_offset = iteration_number * self.output_window_size
        model_outputs = self._process_window(
            features={
                feature_name:
                feature_value[:, base_offset:base_offset + self.window_size]
                for feature_name, feature_value in features.items()},
            mode=mode,
            exogenous_regressors=exogenous_regressors[
                :, base_offset:base_offset + self.window_size])
        # This code needs to be updated if new predictions are added in
        # self._process_window
        assert len(model_outputs.predictions) == 3
        assert "mean" in model_outputs.predictions
        assert "covariance" in model_outputs.predictions
        assert "observed" in model_outputs.predictions
        return (iteration_number + 1,
                loss_ta.write(
                    iteration_number, model_outputs.loss),
                mean_ta.write(
                    iteration_number, model_outputs.predictions["mean"]),
                covariance_ta.write(
                    iteration_number, model_outputs.predictions["covariance"]))
      _, loss_ta, mean_ta, covariance_ta = control_flow_ops.while_loop(
          _while_condition, _while_body,
          [0,
           tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations),
           tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations),
           tensor_array_ops.TensorArray(dtype=self.dtype, size=num_iterations)])
      values = math_ops.cast(features[TrainEvalFeatures.VALUES],
                             dtype=self.dtype)
      batch_size = array_ops.shape(times)[0]
      prediction_shape = [batch_size, self.output_window_size * num_iterations,
                          self.num_features]
      (previous_state_times,
       previous_state_values,
       previous_state_exogenous_regressors) = state
      # Make sure returned state always has windows of self.input_window_size,
      # even if we were passed fewer than self.input_window_size points this
      # time.
      if self.input_window_size > 0:
        new_state_times = array_ops.concat(
            [previous_state_times,
             math_ops.cast(times, dtype=dtypes.int64)],
            axis=1)[:, -self.input_window_size:]
        new_state_times.set_shape((None, self.input_window_size))
        new_state_values = array_ops.concat(
            [previous_state_values,
             self._scale_data(values)], axis=1)[:, -self.input_window_size:, :]
        new_state_values.set_shape((None, self.input_window_size,
                                    self.num_features))
        new_exogenous_regressors = array_ops.concat(
            [previous_state_exogenous_regressors,
             exogenous_regressors], axis=1)[:, -self.input_window_size:, :]
        new_exogenous_regressors.set_shape(
            (None,
             self.input_window_size,
             self.exogenous_size))
      else:
        # There is no state to keep, and the strided slices above do not handle
        # input_window_size=0.
        new_state_times = previous_state_times
        new_state_values = previous_state_values
        new_exogenous_regressors = previous_state_exogenous_regressors
      return model.ModelOutputs(
          loss=math_ops.reduce_mean(loss_ta.stack(), axis=0),
          end_state=(new_state_times,
                     new_state_values,
                     new_exogenous_regressors),
          predictions={
              "mean": array_ops.reshape(
                  array_ops.transpose(mean_ta.stack(), [1, 0, 2, 3]),
                  prediction_shape),
              "covariance": array_ops.reshape(
                  array_ops.transpose(covariance_ta.stack(), [1, 0, 2, 3]),
                  prediction_shape),
              "observed": values[:, -output_size:]},
          prediction_times=times[:, -output_size:])
    else:
      raise ValueError(
          "Unknown mode '{}' passed to get_batch_loss.".format(mode))
Exemple #38
0
    def call(self,
             inputs,
             initial_state=None,
             dtype=None,
             sequence_length=None):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`.
      initial_state: a tuple `(initial_cell_state, initial_output)` with tensors
        of shape `[batch_size, self._num_units]`. If this is not provided, the
        cell is expected to create a zero initial state of type `dtype`.
      dtype: The data type for the initial state and expected output. Required
        if `initial_state` is not provided or RNN state has a heterogeneous
        dtype.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len).`
        Defaults to `time_len` for each element.

    Returns:
      A pair containing:

      - Output: A `3-D` tensor of shape `[time_len, batch_size, output_size]`
        or a list of time_len tensors of shape `[batch_size, output_size]`,
        to match the type of the `inputs`.
      - Final state: a tuple `(cell_state, output)` matching `initial_state`.

    Raises:
      ValueError: in case of shape mismatches
    """
        is_list = isinstance(inputs, list)
        if is_list:
            inputs = array_ops.stack(inputs)
        inputs_shape = inputs.get_shape().with_rank(3)
        if not inputs_shape[2]:
            raise ValueError("Expecting inputs_shape[2] to be set: %s" %
                             inputs_shape)
        batch_size = inputs_shape[1].value
        if batch_size is None:
            batch_size = array_ops.shape(inputs)[1]
        time_len = inputs_shape[0].value
        if time_len is None:
            time_len = array_ops.shape(inputs)[0]

        # Provide default values for initial_state and dtype
        if initial_state is None:
            if dtype is None:
                raise ValueError(
                    "Either initial_state or dtype needs to be specified")
            z = array_ops.zeros(array_ops.stack([batch_size, self.num_units]),
                                dtype=dtype)
            initial_state = z, z
        else:
            if len(initial_state) != 2:
                raise ValueError(
                    "Expecting initial_state to be a tuple with length 2 or None"
                )
            if dtype is None:
                dtype = initial_state[0].dtype

        # create the actual cell
        if sequence_length is not None:
            sequence_length = ops.convert_to_tensor(sequence_length)
        initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
        cell_states, outputs = self._call_cell(inputs, initial_cell_state,
                                               initial_output, dtype,
                                               sequence_length)

        if sequence_length is not None:
            # Mask out the part beyond sequence_length
            mask = array_ops.transpose(
                array_ops.sequence_mask(sequence_length, time_len,
                                        dtype=dtype), [1, 0])
            mask = array_ops.tile(array_ops.expand_dims(mask, [-1]),
                                  [1, 1, self.num_units])
            outputs *= mask
            # Prepend initial states to cell_states and outputs for indexing to work
            # correctly,since we want to access the last valid state at
            # sequence_length - 1, which can even be -1, corresponding to the
            # initial state.
            mod_cell_states = array_ops.concat(
                [array_ops.expand_dims(initial_cell_state, [0]), cell_states],
                0)
            mod_outputs = array_ops.concat(
                [array_ops.expand_dims(initial_output, [0]), outputs], 0)
            final_cell_state = self._gather_states(mod_cell_states,
                                                   sequence_length, batch_size)
            final_output = self._gather_states(mod_outputs, sequence_length,
                                               batch_size)
        else:
            # No sequence_lengths used: final state is the last state
            final_cell_state = cell_states[-1]
            final_output = outputs[-1]

        if is_list:
            # Input was a list, so return a list
            outputs = array_ops.unstack(outputs)

        final_state = rnn_cell_impl.LSTMStateTuple(final_cell_state,
                                                   final_output)
        return outputs, final_state
  def _VerifyValues(self,
                    tensor_in_sizes,
                    filter_in_sizes,
                    stride,
                    padding,
                    use_gpu,
                    data_format="NHWC"):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in
        [batch, input_rows, input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in
        [filter_rows, filter_cols, input_depth, depth_multiplier].
      stride: Stride.
      padding: Padding type.
      use_gpu: Whether to use GPU.
      data_format: The data_format of the input.  "NHWC" or "NCHW".
    """
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s
    # Initializes the input and filter tensor with numbers incrementing from 1.
    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
    with self.test_session(use_gpu=use_gpu) as sess:
      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
      t1.set_shape(tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes)

      native_t1 = t1
      strides = [1, stride, stride, 1]
      if data_format == "NCHW":
        # Transpose from NWHC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
        strides = [1, 1, stride, stride]

      conv_native = nn_ops.depthwise_conv2d_native(
          native_t1,
          t2,
          strides=strides,
          data_format=data_format,
          padding=padding)

      if data_format == "NCHW":
        # Transpose back from NCHW to NHWC
        conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

      conv_interface = nn_impl.depthwise_conv2d(
          t1, t2, strides=[1, stride, stride, 1], padding=padding)

      native_result = sess.run(conv_native)
      interface_result = sess.run(conv_interface)

    print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes,
          ", stride:", stride, ", padding: ", padding, ", max diff: ",
          np.amax(np.absolute(native_result - interface_result)))
    self.assertArrayNear(
        np.ravel(native_result), np.ravel(interface_result), 1e-5)
    self.assertShapeEqual(native_result, conv_native)
    self.assertShapeEqual(native_result, conv_interface)
Exemple #40
0
    def batch_jacobian(self,
                       target,
                       source,
                       unconnected_gradients=UnconnectedGradients.NONE,
                       parallel_iterations=None,
                       experimental_use_pfor=True):
        """Computes and stacks per-example jacobians.

    See [wikipedia article](http://en.wikipedia.org/wiki/jacobian_matrix_and_determinant) for the
    definition of a Jacobian. This function is essentially an efficient
    implementation of the following:

    `tf.stack([self.jacobian(y[i], x[i]) for i in range(x.shape[0])])`.

    Note that compared to `GradientTape.jacobian` which computes gradient of
    each output value w.r.t each input value, this function is useful when
    `target[i,...]` is independent of `source[j,...]` for `j != i`. This
    assumption allows more efficient computation as compared to
    `GradientTape.jacobian`. The output, as well as intermediate activations,
    are lower dimensional and avoid a bunch of redundant zeros which would
    result in the jacobian computation given the independence assumption.

    Example usage:

    ```python
    with tf.GradientTape() as g:
      x = tf.constant([[1., 2.], [3., 4.]], dtype=tf.float32)
      g.watch(x)
      y = x * x
    batch_jacobian = g.batch_jacobian(y, x)
    # batch_jacobian is [[[2,  0], [0,  4]], [[6,  0], [0,  8]]]
    ```

    Args:
      target: A tensor with rank 2 or higher and with shape [b, y1, ..., y_n].
        `target[i,...]` should only depend on `source[i,...]`.
      source: A tensor with rank 2 or higher and with shape [b, x1, ..., x_m].
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.
      parallel_iterations: A knob to control how many iterations are dispatched
        in parallel. This knob can be used to control the total memory usage.
      experimental_use_pfor: If true, uses pfor for computing the Jacobian. Else
        uses a tf.while_loop.

    Returns:
      A tensor `t` with shape [b, y_1, ..., y_n, x1, ..., x_m] where `t[i, ...]`
      is the jacobian of `target[i, ...]` w.r.t. `source[i, ...]`, i.e. stacked
      per-example jacobians.

    Raises:
      RuntimeError: If called on a non-persistent tape with eager execution
        enabled and without enabling experimental_use_pfor.
      ValueError: If vectorization of jacobian computation fails or if first
        dimension of `target` and `source` do not match.
    """
        target_shape = target.shape
        if target_shape.rank is None:
            dim = tensor_shape.Dimension(None)
        else:
            dim = target_shape.dims[0]
        if not (target_shape.with_rank_at_least(2)
                and source.shape.with_rank_at_least(2)
                and dim.is_compatible_with(source.shape[0])):
            raise ValueError("Need first dimension of target shape (%s) and "
                             "source shape (%s) to match." %
                             (target.shape, source.shape))
        if target_shape.is_fully_defined():
            batch_size = int(target_shape[0])
            target_row_size = target_shape.num_elements() // batch_size
        else:
            target_shape = array_ops.shape(target)
            batch_size = target_shape[0]
            target_row_size = array_ops.size(target) // batch_size
        source_shape = array_ops.shape(source)
        # Flatten target to 2-D.
        # Note that we push and pop the tape here and below. This is needed since we
        # need gradients through the enclosed operations.
        self._push_tape()
        with ops.control_dependencies(
            [check_ops.assert_equal(batch_size, source_shape[0])]):
            target = array_ops.reshape(target, [batch_size, target_row_size])
        self._pop_tape()

        def loop_fn(i):
            self._push_tape()
            y = array_ops.gather(target, i, axis=1)
            self._pop_tape()
            return self.gradient(y,
                                 source,
                                 unconnected_gradients=unconnected_gradients)

        if experimental_use_pfor:
            try:
                output = pfor_ops.pfor(loop_fn,
                                       target_row_size,
                                       parallel_iterations=parallel_iterations)
            except ValueError as err:
                six.reraise(
                    ValueError,
                    ValueError(
                        str(err) +
                        "\nEncountered an exception while vectorizing the "
                        "batch_jacobian computation. Vectorization can be disabled by "
                        "setting experimental_use_pfor to False."),
                    sys.exc_info()[2])
        else:
            if context.executing_eagerly() and not self._persistent:
                raise RuntimeError(
                    "GradientTape must be created with persistent=True"
                    " to compute the batch_jacobian with eager execution enabled and "
                    " with experimental_use_pfor set to False.")
            output = pfor_ops.for_loop(loop_fn,
                                       target.dtype,
                                       target_row_size,
                                       parallel_iterations=parallel_iterations)
        new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0)
        if output is None:
            return array_ops.zeros(new_shape)
        else:
            output = array_ops.reshape(output,
                                       [target_row_size, batch_size, -1])
            output = array_ops.transpose(output, [1, 0, 2])
            return array_ops.reshape(output, new_shape)
Exemple #41
0
 def f(a, b):
   return array_ops.transpose(a, b)
Exemple #42
0
    def log_prob(self, x, name='log_prob'):
        """Log of the probability density/mass function.

    Args:
      x: `float` or `double` `Tensor`.
      name: The name to give this op.

    Returns:
      log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
        values of type `self.dtype`.
    """
        with ops.name_scope(self.name):
            with ops.name_scope(name, values=[x] + list(self.inputs.values())):
                x = ops.convert_to_tensor(x, name='x')
                contrib_tensor_util.assert_same_float_dtype(
                    (self.scale_operator_pd, x))
                if self.cholesky_input_output_matrices:
                    x_sqrt = x
                else:
                    # Complexity: O(nbk^3)
                    x_sqrt = linalg_ops.batch_cholesky(x)

                batch_shape = self.batch_shape()
                event_shape = self.event_shape()
                ndims = array_ops.rank(x_sqrt)
                # sample_ndims = ndims - batch_ndims - event_ndims
                sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2
                sample_shape = array_ops.slice(array_ops.shape(x_sqrt), [0],
                                               [sample_ndims])

                # We need to be able to pre-multiply each matrix by its corresponding
                # batch scale matrix.  Since a Distribution Tensor supports multiple
                # samples per batch, this means we need to reshape the input matrix `x`
                # so that the first b dimensions are batch dimensions and the last two
                # are of shape [dimension, dimensions*number_of_samples]. Doing these
                # gymnastics allows us to do a batch_solve.
                #
                # After we're done with sqrt_solve (the batch operation) we need to undo
                # this reshaping so what we're left with is a Tensor partitionable by
                # sample, batch, event dimensions.

                # Complexity: O(nbk^2) since transpose must access every element.
                scale_sqrt_inv_x_sqrt = x_sqrt
                perm = array_ops.concat(0, (math_ops.range(
                    sample_ndims, ndims), math_ops.range(0, sample_ndims)))
                scale_sqrt_inv_x_sqrt = array_ops.transpose(
                    scale_sqrt_inv_x_sqrt, perm)
                shape = array_ops.concat(0, (batch_shape, (math_ops.cast(
                    self.dimension, dtype=dtypes.int32), -1)))
                scale_sqrt_inv_x_sqrt = array_ops.reshape(
                    scale_sqrt_inv_x_sqrt, shape)

                # Complexity: O(nbM*k) where M is the complexity of the operator solving
                # a vector system.  E.g., for OperatorPDDiag, each solve is O(k), so
                # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is
                # O(k^2) so this step has complexity O(nbk^3).
                scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve(
                    scale_sqrt_inv_x_sqrt)

                # Undo make batch-op ready.
                # Complexity: O(nbk^2)
                shape = array_ops.concat(
                    0, (batch_shape, event_shape, sample_shape))
                scale_sqrt_inv_x_sqrt = array_ops.reshape(
                    scale_sqrt_inv_x_sqrt, shape)
                perm = array_ops.concat(
                    0, (math_ops.range(ndims - sample_ndims, ndims),
                        math_ops.range(0, ndims - sample_ndims)))
                scale_sqrt_inv_x_sqrt = array_ops.transpose(
                    scale_sqrt_inv_x_sqrt, perm)

                # Write V = SS', X = LL'. Then:
                # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
                #              = tr[inv(S) L L' inv(S)']
                #              = tr[(inv(S) L) (inv(S) L)']
                #              = sum_{ik} (inv(S) L)_{ik}^2
                # The second equality follows from the cyclic permutation property.
                # Complexity: O(nbk^2)
                trace_scale_inv_x = math_ops.reduce_sum(
                    math_ops.square(scale_sqrt_inv_x_sqrt),
                    reduction_indices=[-2, -1])

                # Complexity: O(nbk)
                half_log_det_x = math_ops.reduce_sum(math_ops.log(
                    array_ops.batch_matrix_diag_part(x_sqrt)),
                                                     reduction_indices=[-1])

                # Complexity: O(nbk^2)
                log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                            0.5 * trace_scale_inv_x -
                            self.log_normalizing_constant())

                # Set shape hints.
                # Try to merge what we know from the input then what we know from the
                # parameters of this distribution.
                if x.get_shape().ndims is not None:
                    log_prob.set_shape(x.get_shape()[:-2])
                if (log_prob.get_shape().ndims is not None
                        and self.get_batch_shape().ndims is not None
                        and self.get_batch_shape().ndims > 0):
                    log_prob.get_shape(
                    )[-self.get_batch_shape().ndims:].merge_with(
                        self.get_batch_shape())

                return log_prob
Exemple #43
0
def norm(tensor,
         ord='euclidean',
         axis=None,
         keepdims=None,
         name=None,
         keep_dims=None):
    r"""Computes the norm of vectors, matrices, and tensors.

  This function can compute several different vector norms (the 1-norm, the
  Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and
  matrix norms (Frobenius, 1-norm, 2-norm and inf-norm).

  Args:
    tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128`
    ord: Order of the norm. Supported values are 'fro', 'euclidean',
      `1`, `2`, `np.inf` and any positive real number yielding the corresponding
      p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if
      `tensor` is a matrix and equivalent to 2-norm for vectors.
      Some restrictions apply:
        a) The Frobenius norm `fro` is not defined for vectors,
        b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`,
           `2`, `np.inf` are supported.
      See the description of `axis` on how to compute norms for a batch of
      vectors or matrices stored in a tensor.
    axis: If `axis` is `None` (the default), the input is considered a vector
      and a single vector norm is computed over the entire set of values in the
      tensor, i.e. `norm(tensor, ord=ord)` is equivalent to
      `norm(reshape(tensor, [-1]), ord=ord)`.
      If `axis` is a Python integer, the input is considered a batch of vectors,
      and `axis` determines the axis in `tensor` over which to compute vector
      norms.
      If `axis` is a 2-tuple of Python integers it is considered a batch of
      matrices and `axis` determines the axes in `tensor` over which to compute
      a matrix norm.
      Negative indices are supported. Example: If you are passing a tensor that
      can be either a matrix or a batch of matrices at runtime, pass
      `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
      computed.
    keepdims: If True, the axis indicated in `axis` are kept with size 1.
      Otherwise, the dimensions in `axis` are removed from the output shape.
    name: The name of the op.
    keep_dims: Deprecated alias for `keepdims`.

  Returns:
    output: A `Tensor` of the same type as tensor, containing the vector or
      matrix norms. If `keepdims` is True then the rank of output is equal to
      the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
      if `axis` is an integer, the rank of `output` is one less than the rank
      of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
      than the rank of `tensor`.

  Raises:
    ValueError: If `ord` or `axis` is invalid.

  @compatibility(numpy)
  Mostly equivalent to numpy.linalg.norm.
  Not supported: ord <= 0, 2-norm for matrices, nuclear norm.
  Other differences:
    a) If axis is `None`, treats the flattened `tensor` as a vector
     regardless of rank.
    b) Explicitly supports 'euclidean' norm as the default, including for
     higher order tensors.
  @end_compatibility
  """
    keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
                                                      'keep_dims', keep_dims)
    if keepdims is None:
        keepdims = False

    is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list))
                      and len(axis) == 2)
    if is_matrix_norm:
        axis = tuple(axis)
        if (not isinstance(axis[0], int) or not isinstance(axis[1], int)
                or axis[0] == axis[1]):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )
        supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf]
        if ord not in supported_matrix_norms:
            raise ValueError(
                "'ord' must be a supported matrix norm in %s, got %s" %
                (supported_matrix_norms, ord))
    else:
        if not (isinstance(axis, int) or axis is None):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )

        supported_vector_norms = ['euclidean', 1, 2, np.inf]
        if (not np.isreal(ord)
                or ord <= 0) and ord not in supported_vector_norms:
            raise ValueError("'ord' must be a supported vector norm, got %s" %
                             ord)
        if axis is not None:
            axis = (axis, )

    with ops.name_scope(name, 'norm', [tensor]):
        tensor = ops.convert_to_tensor(tensor)

        if ord in ['fro', 'euclidean', 2, 2.0]:
            if is_matrix_norm and ord in [2, 2.0]:
                rank = array_ops.rank(tensor)
                positive_axis = map_fn.map_fn(
                    lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda:
                                                    i + rank),
                    ops.convert_to_tensor(axis))
                axes = math_ops.range(rank)
                perm_before = array_ops.concat([
                    array_ops.setdiff1d(axes, positive_axis)[0], positive_axis
                ],
                                               axis=0)
                perm_after = map_fn.map_fn(
                    lambda i: math_ops.cast(array_ops.squeeze(
                        array_ops.where(math_ops.equal(perm_before, i))),
                                            dtype=dtypes.int32), axes)
                permed = array_ops.transpose(tensor, perm=perm_before)
                matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max(
                    math_ops.abs(
                        gen_linalg_ops.svd(permed, compute_uv=False)[0]),
                    axis=-1,
                    keepdims=True),
                                                      axis=-1)
                result = array_ops.transpose(matrix_2_norm, perm=perm_after)
            else:
                result = math_ops.sqrt(
                    math_ops.reduce_sum(tensor * math_ops.conj(tensor),
                                        axis,
                                        keepdims=True))
        else:
            result = math_ops.abs(tensor)
            if ord == 1:
                sum_axis = None if axis is None else axis[0]
                result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
                if is_matrix_norm:
                    result = math_ops.reduce_max(result,
                                                 axis[-1],
                                                 keepdims=True)
            elif ord == np.inf:
                if is_matrix_norm:
                    result = math_ops.reduce_sum(result,
                                                 axis[1],
                                                 keepdims=True)
                max_axis = None if axis is None else axis[0]
                result = math_ops.reduce_max(result, max_axis, keepdims=True)
            else:
                # General p-norms (positive p only)
                result = math_ops.pow(
                    math_ops.reduce_sum(math_ops.pow(result, ord),
                                        axis,
                                        keepdims=True), 1.0 / ord)
        if not keepdims:
            result = array_ops.squeeze(result, axis)
        return result
 def convert(w):
     return array_ops.transpose(w) if transpose_weights else w
  def _VerifyValuesWithDilation(self,
                                tensor_in_sizes,
                                filter_in_sizes,
                                stride,
                                dilation,
                                padding,
                                data_type,
                                data_format="NHWC"):
    """Verifies the output values of the convolution function.

    Args:
      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
        input_cols, input_depth].
      filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols,
        input_depth, depth_multiplier].
      stride: Stride.
      dilation: Dilation.
      padding: Padding type.
      data_type: The data type to use.
      data_format: The data_format of the input. "NHWC" or "NCHW".
    """
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s
    # Initializes the input and filter tensor with numbers incrementing from 1.
    x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)],
                  dtype=data_type).reshape(tensor_in_sizes)
    x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)],
                  dtype=data_type).reshape(filter_in_sizes)
    with self.session() as sess:
      if data_type == np.float32:
        # TODO(b/64210055): Tolerance for TPU is high.
        tolerance = 1e-2
      else:
        self.assertEqual(data_type, np.float64)
        tolerance = 1e-8

      t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=data_type)
      t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=data_type)

      native_t1 = t1
      strides = [1, stride, stride, 1]
      dilations = [dilation, dilation]
      if data_format == "NCHW":
        # Transpose from NWHC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_t1 = array_ops.transpose(t1, [0, 3, 1, 2])
        strides = [1, 1, stride, stride]

      with self.test_scope():
        conv_native = nn_impl.depthwise_conv2d(
            native_t1,
            t2,
            strides=strides,
            rate=dilations,
            data_format=data_format,
            padding=padding)

      if data_format == "NCHW":
        # Transpose back from NCHW to NHWC
        conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])

      with ops.device("CPU"):
        # CPU only support NHWC format
        strides = [1, stride, stride, 1]
        conv_interface = nn_impl.depthwise_conv2d(
            t1, t2, strides=strides, rate=dilations, padding=padding)

      native_result = sess.run(conv_native, {t1: x1, t2: x2})
      interface_result = sess.run(conv_interface, {t1: x1, t2: x2})

    print("data_type:", data_type, "max diff = ",
          np.amax(np.absolute(native_result - interface_result)))
    self.assertAllClose(
        np.ravel(native_result), np.ravel(interface_result), rtol=tolerance)
Exemple #46
0
    def sample_n(self, n, seed=None, name='sample'):
        # pylint: disable=line-too-long
        """Generate `n` samples.

    Complexity: O(nbk^3)

    The sampling procedure is based on the [Bartlett decomposition](
    https://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition)
    and [using a Gamma distribution to generate Chi2 random variates](
    https://en.wikipedia.org/wiki/Chi-squared_distribution#Gamma.2C_exponential.2C_and_related_distributions).

    Args:
      n: Scalar. Number of samples to draw from each distribution.
      seed: Python integer; random number generator seed.
      name: The name of this op.

    Returns:
      samples: a `Tensor` of shape `(n,) + self.batch_shape + self.event_shape`
          with values of type `self.dtype`.
    """
        with ops.name_scope(self.name):
            with ops.name_scope(name, values=[n] + list(self.inputs.values())):
                n = ops.convert_to_tensor(n, name='n')
                if n.dtype != dtypes.int32:
                    raise TypeError('n.dtype=%s which is not int32' % n.dtype)
                batch_shape = self.batch_shape()
                event_shape = self.event_shape()
                batch_ndims = array_ops.shape(batch_shape)[0]

                ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
                shape = array_ops.concat(0, ((n, ), batch_shape, event_shape))

                # Complexity: O(nbk^2)
                x = random_ops.random_normal(shape=shape,
                                             mean=0.,
                                             stddev=1.,
                                             dtype=self.dtype,
                                             seed=seed)

                # Complexity: O(nbk)
                # This parametrization is equivalent to Chi2, i.e.,
                # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
                g = random_ops.random_gamma(shape=(n, ),
                                            alpha=self._multi_gamma_sequence(
                                                0.5 * self.df, self.dimension),
                                            beta=0.5,
                                            dtype=self.dtype,
                                            seed=seed)

                # Complexity: O(nbk^2)
                x = array_ops.batch_matrix_band_part(x, -1, 0)  # Tri-lower.

                # Complexity: O(nbk)
                x = array_ops.batch_matrix_set_diag(x, math_ops.sqrt(g))

                # Make batch-op ready.
                # Complexity: O(nbk^2)
                perm = array_ops.concat(0, (math_ops.range(1, ndims), (0, )))
                x = array_ops.transpose(x, perm)
                shape = array_ops.concat(0,
                                         (batch_shape, (event_shape[0], -1)))
                x = array_ops.reshape(x, shape)

                # Complexity: O(nbM) where M is the complexity of the operator solving a
                # vector system.  E.g., for OperatorPDDiag, each matmul is O(k^2), so
                # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is
                # O(k^3) so this step has complexity O(nbk^3).
                x = self.scale_operator_pd.sqrt_matmul(x)

                # Undo make batch-op ready.
                # Complexity: O(nbk^2)
                shape = array_ops.concat(0, (batch_shape, event_shape, (n, )))
                x = array_ops.reshape(x, shape)
                perm = array_ops.concat(
                    0, ((ndims - 1, ), math_ops.range(0, ndims - 1)))
                x = array_ops.transpose(x, perm)

                if not self.cholesky_input_output_matrices:
                    # Complexity: O(nbk^3)
                    x = math_ops.batch_matmul(x, x, adj_y=True)

                # Set shape hints.
                if self.scale_operator_pd.get_shape().ndims is not None:
                    x.set_shape(
                        tensor_shape.TensorShape(
                            [tensor_util.constant_value(n)] +
                            self.scale_operator_pd.get_shape().as_list()))
                elif x.get_shape().ndims is not None:
                    x.get_shape()[0].merge_with(
                        tensor_shape.TensorDimension(
                            tensor_util.constant_value(n)))

                return x
  def _ConstructAndTestGradient(self,
                                input_shape,
                                filter_shape,
                                output_shape,
                                stride,
                                padding,
                                data_type,
                                test_input,
                                use_gpu,
                                data_format="NHWC"):
    input_size = 1
    for x in input_shape:
      input_size *= x
    filter_size = 1
    for x in filter_shape:
      filter_size *= x
    input_data = [x * 1.0 / input_size for x in range(0, input_size)]
    filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
    with self.test_session(use_gpu=use_gpu):
      if data_type == dtypes.float32:
        tolerance = 0.002
      else:
        self.assertEqual(data_type, dtypes.float64)
        tolerance = 1e-8

      input_tensor = constant_op.constant(
          input_data, shape=input_shape, dtype=data_type, name="input")
      filter_tensor = constant_op.constant(
          filter_data, shape=filter_shape, dtype=data_type, name="filter")

      native_input = input_tensor
      strides = [1, stride, stride, 1]
      if data_format == "NCHW":
        # Transpose from NWHC input to NCHW
        # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
        native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2])
        input_shape = [
            input_shape[0], input_shape[3], input_shape[1], input_shape[2]
        ]
        output_shape = [
            output_shape[0], output_shape[3], output_shape[1], output_shape[2]
        ]
        strides = [1, 1, stride, stride]

      depthwise_conv2d = nn_ops.depthwise_conv2d_native(
          native_input,
          filter_tensor,
          strides,
          padding,
          data_format=data_format,
          name="depthwise_conv2d")

      self.assertEqual(output_shape, depthwise_conv2d.get_shape())
      if test_input:
        err = gradient_checker.compute_gradient_error(
            native_input, input_shape, depthwise_conv2d, output_shape)
      else:
        err = gradient_checker.compute_gradient_error(filter_tensor,
                                                      filter_shape,
                                                      depthwise_conv2d,
                                                      output_shape)
      print("depthwise conv_2d gradient error = ", err)
      self.assertLess(err, tolerance)
def HwioToOihw(in_tensor):
  return array_ops.transpose(in_tensor, [3, 2, 0, 1])
def rotate_transpose(x, shift, name="rotate_transpose"):
    """Circularly moves dims left or right.

  Effectively identical to:

  ```python
  numpy.transpose(x, numpy.roll(numpy.arange(len(x.shape)), shift))
  ```

  When `validate_args=False` additional graph-runtime checks are
  performed. These checks entail moving data from to GPU to CPU.

  Example:

    ```python
    x = ... # Tensor of shape [1, 2, 3, 4].
    rotate_transpose(x, -1)  # result shape: [2, 3, 4, 1]
    rotate_transpose(x, -2)  # result shape: [3, 4, 1, 2]
    rotate_transpose(x,  1)  # result shape: [4, 1, 2, 3]
    rotate_transpose(x,  2)  # result shape: [3, 4, 1, 2]
    rotate_transpose(x, 7) == rotate_transpose(x, 3)
    rotate_transpose(x, -7) == rotate_transpose(x, -3)
    ```

  Args:
    x: `Tensor`.
    shift: `Tensor`. Number of dimensions to transpose left (shift<0) or
      transpose right (shift>0).
    name: `String`. The name to give this op.

  Returns:
    rotated_x: Input `Tensor` with dimensions circularly rotated by shift.

  Raises:
    TypeError: if shift is not integer type.
  """
    with ops.name_scope(name, values=[x, shift]):
        x = ops.convert_to_tensor(x, name="x")
        shift = ops.convert_to_tensor(shift, name="shift")
        # We do not assign back to preserve constant-ness.
        check_ops.assert_integer(shift)
        shift_value_static = tensor_util.constant_value(shift)
        ndims = x.get_shape().ndims
        if ndims is not None and shift_value_static is not None:
            if ndims < 2: return x
            shift_value_static = np.sign(shift_value_static) * (
                abs(shift_value_static) % ndims)
            if shift_value_static == 0: return x
            perm = np.roll(np.arange(ndims), shift_value_static)
            return array_ops.transpose(x, perm=perm)
        else:
            # Consider if we always had a positive shift, and some specified
            # direction.
            # When shifting left we want the new array:
            #   last(x, n-shift) + first(x, shift)
            # and if shifting right then we want:
            #   last(x, shift) + first(x, n-shift)
            # Observe that last(a) == slice(a, n) and first(a) == slice(0, a).
            # Also, we can encode direction and shift as one: direction * shift.
            # Combining these facts, we have:
            #   a = cond(shift<0, -shift, n-shift)
            #   last(x, n-a) + first(x, a) == x[a:n] + x[0:a]
            # Finally, we transform shift by modulo length so it can be specified
            # independently from the array upon which it operates (like python).
            ndims = array_ops.rank(x)
            shift = math_ops.select(math_ops.less(shift, 0),
                                    math_ops.mod(-shift, ndims),
                                    ndims - math_ops.mod(shift, ndims))
            first = math_ops.range(0, shift)
            last = math_ops.range(shift, ndims)
            perm = array_ops.concat(0, (last, first))
            return array_ops.transpose(x, perm=perm)
def NchwToNchwVectC(in_tensor):
  n, c, h, w = in_tensor.shape.as_list()
  assert c % 4 == 0
  t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w])
  return array_ops.transpose(t, [0, 1, 3, 4, 2])
Exemple #51
0
def _ExtractImagePatchesGrad(op, grad):

  batch_size, rows_in, cols_in, channels = [
    dim.value for dim in op.inputs[0].get_shape()
  ]
  input_bhwc = array_ops.shape(op.inputs[0])
  batch_size = input_bhwc[0]
  channels = input_bhwc[3]

  _, rows_out, cols_out, _ = [
    dim.value for dim in op.outputs[0].get_shape()
  ]
  _, ksize_r, ksize_c, _ = op.get_attr('ksizes')
  _, stride_r, stride_h, _ = op.get_attr('strides')
  _, rate_r, rate_c, _ = op.get_attr('rates')
  padding = op.get_attr('padding')

  ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1)
  ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1)

  if padding == b'SAME':
    rows_out = int(ceil(rows_in / stride_r))
    cols_out = int(ceil(cols_in / stride_h))
    pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2
    pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2

  elif padding == b'VALID':
    rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r))
    cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h))
    pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in
    pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in

  pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols)

  grad_expanded = array_ops.transpose(
    array_ops.reshape(grad, (batch_size, rows_out,
                             cols_out, ksize_r, ksize_c, channels)),
    (1, 2, 3, 4, 0, 5)
  )
  grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

  row_steps = range(0, rows_out * stride_r, stride_r)
  col_steps = range(0, cols_out * stride_h, stride_h)

  idx = []
  for i in range(rows_out):
    for j in range(cols_out):
      r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols
      r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff

      idx.extend([(r * (cols_in) + c,
                   i * (cols_out * ksize_r * ksize_c) +
                   j * (ksize_r * ksize_c) +
                   ri * (ksize_c) + ci)
                  for (ri, r) in enumerate(range(r_low, r_high, rate_r))
                  for (ci, c) in enumerate(range(c_low, c_high, rate_c))
                  if 0 <= r and r < rows_in and 0 <= c and c < cols_in
      ])

  sp_shape = (rows_in * cols_in,
              rows_out * cols_out * ksize_r * ksize_c)

  sp_mat = sparse_tensor.SparseTensor(
    array_ops.constant(idx, dtype=ops.dtypes.int64),
    array_ops.ones((len(idx),), dtype=ops.dtypes.float32),
    sp_shape
  )

  jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

  grad_out = array_ops.reshape(
    jac, (rows_in, cols_in, batch_size, channels)
  )
  grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

  return [grad_out]
Exemple #52
0
def _TransposeGrad(op, grad):
  """Returns unshuffle(grad)."""
  p = op.inputs[1]
  return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]
    def _ConstructAndTestGradient(self,
                                  input_shape,
                                  filter_shape,
                                  output_shape,
                                  stride,
                                  padding,
                                  data_type,
                                  test_input,
                                  use_gpu,
                                  grouped_conv=False,
                                  data_format="NHWC"):
        input_size = 1
        for x in input_shape:
            input_size *= x
        filter_size = 1
        for x in filter_shape:
            filter_size *= x
        input_data = [x * 1.0 / input_size for x in range(0, input_size)]
        filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
        ops.reset_default_graph()
        graph = ops.get_default_graph()
        with self.session(graph=graph, use_gpu=use_gpu) as sess:
            tolerance = {
                dtypes.float16: 4e-0,
                dtypes.float32: 8e-4,
                dtypes.float64: 1e-12,
            }[data_type]

            input_tensor = constant_op.constant(input_data,
                                                shape=input_shape,
                                                dtype=data_type,
                                                name="input")
            filter_tensor = constant_op.constant(filter_data,
                                                 shape=filter_shape,
                                                 dtype=data_type,
                                                 name="filter")

            native_input = input_tensor
            strides = [1, stride, stride, 1]
            if data_format == "NCHW":
                # Transpose from NHWC input to NCHW
                # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
                native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2])
                input_shape = [
                    input_shape[0], input_shape[3], input_shape[1],
                    input_shape[2]
                ]
                output_shape = [
                    output_shape[0], output_shape[3], output_shape[1],
                    output_shape[2]
                ]
                strides = [1, 1, stride, stride]

            with sess.graph._kernel_label_map({
                    "DepthwiseConv2dNative":
                    "cudnn_grouped_convolution",
                    "DepthwiseConv2dNativeBackpropInput":
                    "cudnn_grouped_convolution",
                    "DepthwiseConv2dNativeBackpropFilter":
                    "cudnn_grouped_convolution",
            } if grouped_conv else {}):
                depthwise_conv2d = nn_ops.depthwise_conv2d_native(
                    native_input,
                    filter_tensor,
                    strides,
                    padding,
                    data_format=data_format,
                    name="depthwise_conv2d")

            self.assertEqual(output_shape, depthwise_conv2d.get_shape())

            try:
                if test_input:
                    err = gradient_checker.compute_gradient_error(
                        native_input, input_shape, depthwise_conv2d,
                        output_shape)
                else:
                    err = gradient_checker.compute_gradient_error(
                        filter_tensor, filter_shape, depthwise_conv2d,
                        output_shape)
            except errors.InvalidArgumentError as e:
                # Grouped convolution kernel is only registered for cuDNN 7. Silently
                # return when we are running on an earlier version or without GPU.
                if grouped_conv and e.message.startswith(
                        "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"
                ):
                    tf_logging.warn("Skipping grouped convolution test")
                    return
                raise e

            tf_logging.info(
                "data_type: %r, use_gpu: %r, grouped_conv: %r, error = %f",
                data_type, use_gpu, grouped_conv, err)
            self.assertLess(err, tolerance)
Exemple #54
0
def _GatherV2Grad(op, grad):
  """Gradient for GatherV2 op."""
  # params can be large, so colocate the shape calculation with it.
  #
  # params can be very large for sparse model, array_ops.shape raises
  # exception on the Windows platform when any dimension is larger than
  # int32. params_shape is not used in optimizer apply_sparse gradients,
  # so it's fine to convert it back to int32 regardless of truncation.
  params = op.inputs[0]
  with ops.colocate_with(params):
    params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
    params_shape = math_ops.to_int32(params_shape)

  indices = op.inputs[1]
  indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
  axis = op.inputs[2]
  axis_static = tensor_util.constant_value(axis)

  # For axis 0 gathers, build an appropriately shaped IndexedSlices.
  if axis_static == 0:
    if context.in_eager_mode():
      params_tail_shape = params_shape.cpu()[1:]
    else:
      params_tail_shape = params_shape[1:]
    values_shape = array_ops.concat([indices_size, params_tail_shape], 0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)
    return [ops.IndexedSlices(values, indices, params_shape), None, None]

  outer_shape = params_shape[:axis]
  outer_dims = array_ops.size(outer_shape)
  inner_shape = params_shape[axis:][1:]
  inner_dims = array_ops.size(inner_shape)

  outer_axes_indices = math_ops.range(outer_dims)
  inner_axes_indices = math_ops.range(outer_dims + 1,
                                      outer_dims + 1 + inner_dims)

  values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0)
  values = array_ops.reshape(grad, values_shape)
  indices = array_ops.reshape(indices, indices_size)

  # We need to sum up every slice `values[..., i, ....]` corresponding to
  # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
  # support an axis parameter, we transpose the gather dimension to the front,
  # then use `unsorted_segment_sum` to build a
  # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
  # affecting each index in `gather_axis` summed up.
  transpose_dims = array_ops.concat(
      [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
  values_transpose = array_ops.transpose(values, transpose_dims)
  num_segments = params_shape[axis]

  params_grad = math_ops.unsorted_segment_sum(
      values_transpose, indices, num_segments)

  # Inverts the above transpose by moving dimension 0 back to its original
  # position.
  invert_transpose_dims = array_ops.concat(
      [outer_axes_indices + 1, [0], inner_axes_indices], 0)
  params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
  return [params_grad, None, None]
Exemple #55
0
 def call(self, inputs):
     return array_ops.transpose(inputs, perm=(0, ) + self.dims)
  def multiply_inverse(self, vector):
    # pylint: disable=invalid-name

    Z = utils.layer_params_to_mat2d(vector)

    # Derivations were done for "batch_dim==1" case so we need to convert to
    # that orientation:
    Z = array_ops.transpose(Z)

    if self._option == SeriesFBApproximation.option1:

      # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G.
      L_A, psi_A = self._input_factor.get_option1quants(self._damping_input)
      L_G, psi_G = self._output_factor.get_option1quants(self._damping_output)

      def gamma(x):
        # We are assuming that each case has the same number of time-steps.
        # If this stops being the case one shouldn't simply replace this T
        # with its average value.  Instead, one needs to go back to the
        # definition of the gamma function from the paper.
        T = self._num_timesteps
        return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T))

      # Y = gamma( psi_G*psi_A^T ) (computed element-wise)
      # Even though Y is Z-independent we are recomputing it from the psi's
      # each since Y depends on both A and G quantities, and it is relatively
      # cheap to compute.
      Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A)

      # Z = L_G^T * Z * L_A
      # This is equivalent to the following computation from the original
      # pseudo-code:
      # Z = G0^(-1/2) * Z * A0^(-1/2)
      # Z = U_G^T * Z * U_A
      Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True)

      # Z = Z .* Y
      Z *= Y

      # Z = L_G * Z * L_A^T
      # This is equivalent to the following computation from the original
      # pseudo-code:
      # Z = U_G * Z * U_A^T
      # Z = G0^(-1/2) * Z * A0^(-1/2)
      Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True))

    elif self._option == SeriesFBApproximation.option2:

      # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1),
      # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G.
      P_A, K_A, mu_A = self._input_factor.get_option2quants(self._damping_input)
      P_G, K_G, mu_G = self._output_factor.get_option2quants(
          self._damping_output)

      # Our approach differs superficially from the pseudo-code in the paper
      # in order to reduce the total number of matrix-matrix multiplies.
      # In particular, the first three computations in the pseudo code are
      # Z = G0^(-1/2) * Z * A0^(-1/2)
      # Z = Z - hPsi_G^T * Z * hPsi_A
      # Z = E_G^T * Z * E_A
      # Noting that hPsi = C0^(-1/2) * C1 * C0^(-1/2), so that
      # C0^(-1/2) * hPsi = C0^(-1) * C1 * C0^(-1/2) = P^T * C0^(-1/2)
      # the entire computation can be written as
      # Z = E_G^T * (G0^(-1/2) * Z * A0^(-1/2)
      #     - hPsi_G^T * G0^(-1/2) * Z * A0^(-1/2) * hPsi_A) * E_A
      #   = E_G^T * (G0^(-1/2) * Z * A0^(-1/2)
      #     - G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2)) * E_A
      #   = E_G^T * G0^(-1/2) * Z * A0^(-1/2) * E_A
      #     -  E_G^T* G0^(-1/2) * P_G * Z * P_A^T * A0^(-1/2) * E_A
      #   = K_G^T * Z * K_A  -  K_G^T * P_G * Z * P_A^T * K_A
      # This final expression is computed by the following two lines:
      # Z = Z - P_G * Z * P_A^T
      Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True))
      # Z = K_G^T * Z * K_A
      Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True)

      # Z = Z ./ (1*1^T - mu_G*mu_A^T)
      # Be careful with the outer product.  We don't want to accidentally
      # make it an inner-product instead.
      tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A
      # Prevent some numerical issues by setting any 0.0 eigs to 1.0
      tmp += 1.0 * math_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype)
      Z /= tmp

      # We now perform the transpose/reverse version of the operations
      # derived above, whose derivation from the original pseudo-code is
      # analgous.
      # Z = K_G * Z * K_A^T
      Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True))

      # Z = Z - P_G^T * Z * P_A
      Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True)

      # Z = normalize (1/E[T]) * Z
      # Note that this normalization is done because we compute the statistics
      # by averaging, not summing, over time. (And the gradient is presumably
      # summed over time, not averaged, and thus their scales are different.)
      Z /= math_ops.cast(self._num_timesteps, Z.dtype)

    # Convert back to the "batch_dim==0" orientation.
    Z = array_ops.transpose(Z)

    return utils.mat2d_to_layer_params(vector, Z)
    def _make_histogram_ops(self, model):
        """Defines histogram ops when histogram_freq > 0."""
        # only make histogram summary op if it hasn't already been made
        if self.histogram_freq and self.merged is None:
            for layer in self.model.layers:
                for weight in layer.weights:
                    mapped_weight_name = weight.name.replace(':', '_')
                    tf_summary.histogram(mapped_weight_name, weight)
                    if self.write_images:
                        w_img = array_ops.squeeze(weight)
                        shape = K.int_shape(w_img)
                        if len(shape) == 2:  # dense layer kernel case
                            if shape[0] > shape[1]:
                                w_img = array_ops.transpose(w_img)
                                shape = K.int_shape(w_img)
                            w_img = array_ops.reshape(
                                w_img, [1, shape[0], shape[1], 1])
                        elif len(shape) == 3:  # convnet case
                            if K.image_data_format() == 'channels_last':
                                # switch to channels_first to display
                                # every kernel as a separate image
                                w_img = array_ops.transpose(w_img,
                                                            perm=[2, 0, 1])
                                shape = K.int_shape(w_img)
                            w_img = array_ops.reshape(
                                w_img, [shape[0], shape[1], shape[2], 1])
                        elif len(shape) == 1:  # bias case
                            w_img = array_ops.reshape(w_img,
                                                      [1, shape[0], 1, 1])
                        else:
                            # not possible to handle 3D convnets etc.
                            continue

                        shape = K.int_shape(w_img)
                        assert len(shape) == 4 and shape[-1] in [1, 3, 4]
                        tf_summary.image(mapped_weight_name, w_img)

                if self.write_grads:
                    for weight in layer.trainable_weights:
                        mapped_weight_name = weight.name.replace(':', '_')
                        grads = model.optimizer.get_gradients(
                            model.total_loss, weight)

                        def is_indexed_slices(grad):
                            return type(grad).__name__ == 'IndexedSlices'

                        grads = [
                            grad.values if is_indexed_slices(grad) else grad
                            for grad in grads
                        ]
                        tf_summary.histogram(
                            '{}_grad'.format(mapped_weight_name), grads)

                if hasattr(layer, 'output'):
                    if isinstance(layer.output, list):
                        for i, output in enumerate(layer.output):
                            tf_summary.histogram(
                                '{}_out_{}'.format(layer.name, i), output)
                    else:
                        tf_summary.histogram('{}_out'.format(layer.name),
                                             layer.output)
Exemple #58
0
    def all_gather(self,
                   input_tensor,
                   axis,
                   communication_hint='AUTO',
                   timeout=0):
        """All-gather a dense tensor.

    This method must be called inside a tf.function.

    Args:
      input_tensor: a dense tensor. It must have the same rank on all replicas,
        and dimensions other than `axis` need to be the same as well.
      axis: 0-D int32 Tensor. Dimension along which to gather. Must be in the
        range [0, rank(value)).
      communication_hint: string providing hint to runtime for choosing
        collective implementation. Available options are `AUTO`, `NCCL`, and
        `RING`.
      timeout: a float. The timeout in seconds.

    Returns:
      The gathered Tensor.

    Raises:
      RuntimeError: if called in eager mode.
    """
        if context.executing_eagerly():
            raise RuntimeError('all_gather in eager mode is not supported')

        instance_key_tensor = self._collective_keys.get_instance_key(
            self._group_key, self._device)
        instance_key_shape = self._collective_keys.get_instance_key(
            self._group_key, self._device)
        with ops.device(self._device), \
             ops.control_dependencies([array_ops.identity(input_tensor)]):
            # 1. Transpose
            # E.g. Given an input_tensor with shape [2,2,5,1] and axis to gather is 3,
            # we use perm_pre=[3 0 1 2] to reshape it to [1,2,2,5], which
            # brings the 3rd dim first; afterwards we use perm_after=[1,2,3,0] to
            # place it back.
            perm_pre = array_ops.concat(
                ([axis], math_ops.range(axis),
                 math_ops.range(axis + 1, array_ops.rank(input_tensor))),
                axis=0)
            input_tensor_t = array_ops.transpose(input_tensor, perm=perm_pre)
            # 2. Pad
            gathered_shape = collective_ops.all_gather(
                array_ops.expand_dims_v2(array_ops.shape_v2(input_tensor_t),
                                         axis=0),
                self._group_size,
                self._group_key,
                instance_key_shape,
                communication_hint,
                timeout=timeout)
            first_dims = gathered_shape[:, 0]
            full_axis_dim = math_ops.reduce_max(first_dims)
            padded_input_tensor = _pad_util(input_tensor_t, full_axis_dim)

            # 3. Gather
            gather_padded_out_tensor = collective_ops.all_gather(
                padded_input_tensor,
                self._group_size,
                self._group_key,
                instance_key_tensor,
                communication_hint,
                timeout=timeout)
            # 4. Unpad
            split_tensors = []
            for i in range(first_dims.shape[0]):
                start_pos = i * full_axis_dim
                split_tensors.append(
                    gather_padded_out_tensor[start_pos:start_pos +
                                             first_dims[i]])
            out_tensor_t = array_ops.concat(split_tensors, 0)

            # 5. Transpose back
            perm_after = array_ops.concat(
                (math_ops.range(1, axis + 1), [0],
                 math_ops.range(axis + 1, array_ops.rank(input_tensor_t))),
                axis=0)
            return array_ops.transpose(out_tensor_t, perm=perm_after)
Exemple #59
0
def ctc_loss(labels, inputs, sequence_length,
             preprocess_collapse_repeated=False,
             ctc_merge_repeated=True,
             ignore_longer_outputs_than_inputs=False, time_major=True):
  """Computes the CTC (Connectionist Temporal Classification) Loss.

  This op implements the CTC loss as presented in the article:

  [A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber.
  Connectionist Temporal Classification: Labeling Unsegmented Sequence Data
  with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA, pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf)

  Input requirements:

  ```
  sequence_length(b) <= time for all b

  max(labels.indices(labels.indices[:, 1] == b, 2))
    <= sequence_length(b) for all b.
  ```

  Notes:

  This class performs the softmax operation for you, so inputs should
  be e.g. linear projections of outputs by an LSTM.

  The `inputs` Tensor's innermost dimension size, `num_classes`, represents
  `num_labels + 1` classes, where num_labels is the number of true labels, and
  the largest value `(num_classes - 1)` is reserved for the blank label.

  For example, for a vocabulary containing 3 labels `[a, b, c]`,
  `num_classes = 4` and the labels indexing is `{a: 0, b: 1, c: 2, blank: 3}`.

  Regarding the arguments `preprocess_collapse_repeated` and
  `ctc_merge_repeated`:

  If `preprocess_collapse_repeated` is True, then a preprocessing step runs
  before loss calculation, wherein repeated labels passed to the loss
  are merged into single labels.  This is useful if the training labels come
  from, e.g., forced alignments and therefore have unnecessary repetitions.

  If `ctc_merge_repeated` is set False, then deep within the CTC calculation,
  repeated non-blank labels will not be merged and are interpreted
  as individual labels.  This is a simplified (non-standard) version of CTC.

  Here is a table of the (roughly) expected first order behavior:

  * `preprocess_collapse_repeated=False`, `ctc_merge_repeated=True`

    Classical CTC behavior: Outputs true repeated classes with blanks in
    between, and can also output repeated classes with no blanks in
    between that need to be collapsed by the decoder.

  * `preprocess_collapse_repeated=True`, `ctc_merge_repeated=False`

    Never learns to output repeated classes, as they are collapsed
    in the input labels before training.

  * `preprocess_collapse_repeated=False`, `ctc_merge_repeated=False`

    Outputs repeated classes with blanks in between, but generally does not
    require the decoder to collapse/merge repeated classes.

  * `preprocess_collapse_repeated=True`, `ctc_merge_repeated=True`

    Untested.  Very likely will not learn to output repeated classes.

  The `ignore_longer_outputs_than_inputs` option allows to specify the behavior
  of the CTCLoss when dealing with sequences that have longer outputs than
  inputs. If true, the CTCLoss will simply return zero gradient for those
  items, otherwise an InvalidArgument error is returned, stopping training.

  Args:
    labels: An `int32` `SparseTensor`.
      `labels.indices[i, :] == [b, t]` means `labels.values[i]` stores
      the id for (batch b, time t).
      `labels.values[i]` must take on values in `[0, num_labels)`.
      See `core/ops/ctc_ops.cc` for more details.
    inputs: 3-D `float` `Tensor`.
      If time_major == False, this will be a `Tensor` shaped:
        `[batch_size x max_time x num_classes]`.
      If time_major == True (default), this will be a `Tensor` shaped:
        `[max_time x batch_size x num_classes]`.
      The logits.
    sequence_length: 1-D `int32` vector, size `[batch_size]`.
      The sequence lengths.
    preprocess_collapse_repeated: Boolean.  Default: False.
      If True, repeated labels are collapsed prior to the CTC calculation.
    ctc_merge_repeated: Boolean.  Default: True.
    ignore_longer_outputs_than_inputs: Boolean. Default: False.
      If True, sequences with longer outputs than inputs will be ignored.
    time_major: The shape format of the `inputs` Tensors.
      If True, these `Tensors` must be shaped `[max_time, batch_size, num_classes]`.
      If False, these `Tensors` must be shaped `[batch_size, max_time, num_classes]`.
      Using `time_major = True` (default) is a bit more efficient because it avoids
      transposes at the beginning of the ctc_loss calculation.  However, most
      TensorFlow data is batch-major, so by this function also accepts inputs
      in batch-major form.

  Returns:
    A 1-D `float` `Tensor`, size `[batch]`, containing the negative log probabilities.

  Raises:
    TypeError: if labels is not a `SparseTensor`.
  """
  # The second, third, etc output tensors contain the gradients.  We use it in
  # _CTCLossGrad() below.
  if not isinstance(labels, sparse_tensor.SparseTensor):
    raise TypeError("Expected labels (first argument) to be a SparseTensor")

  # For internal calculations, we transpose to [time, batch, num_classes]
  if not time_major:
    inputs = array_ops.transpose(inputs, [1, 0, 2])  # (B,T,N) => (T,B,N)

  loss, _ = gen_ctc_ops._ctc_loss(
      inputs,
      labels.indices,
      labels.values,
      sequence_length,
      preprocess_collapse_repeated=preprocess_collapse_repeated,
      ctc_merge_repeated=ctc_merge_repeated,
      ignore_longer_outputs_than_inputs=ignore_longer_outputs_than_inputs)

  return loss
Exemple #60
0
def triplet_semihard_without_grad(embeddings_labels, sigma1=1.0, sigma2=1.0):
    '''
    This function is used to calculate the probabilities for positive and negative weighted embeddings
    Input:
    embeddings: the embeddings which represents the images in other space where distance is interpreable
    labels: corresponding label for each embeddings
    sigma: tells how much weight to be assigned based on distance. Low sigma will assign high weight to
            nearby sample.
    
    Output: positive and negative probabilties
    '''

    # First we extract the labels and embeddings
    labels = embeddings_labels[:, :1]

    labels = tf.cast(labels, dtype='int32')

    embeddings = embeddings_labels[:, 1:]

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)

    # This matrix will have 1 when labels are same and 0 when they are different
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    # This matrix will have 1 when labels are different and 0 when they are same
    adjacency_not = math_ops.logical_not(adjacency)

    #Infer batch size
    batch_size = array_ops.size(labels)

    # For calculating positive probability
    affinity = math_ops.exp(-pdist_matrix / sigma1) - array_ops.diag(
        array_ops.ones([batch_size]))
    d_a_p = math_ops.multiply(math_ops.cast(adjacency, dtype=dtypes.float32),
                              affinity)
    pos_prob = math_ops.divide(d_a_p,
                               tf.reduce_sum(d_a_p, axis=1, keepdims=True))

    # Set pos-prob of nearest to 1 in case of nan.
    mask_is_nan = tf.tile(
        tf.math.is_nan(tf.reduce_sum(pos_prob, axis=1, keepdims=True)),
        [1, embeddings.shape.as_list()[0]])

    pdist_matrix_pos = math_ops.multiply(
        math_ops.cast(adjacency, dtype=dtypes.float32), pdist_matrix)

    select_nearest = tf.cast(tf.math.equal(
        pdist_matrix_pos, tf.reduce_max(pdist_matrix_pos,
                                        axis=1,
                                        keepdims=True)),
                             dtype=dtypes.float32)

    pos_prob = array_ops.where(mask_is_nan, select_nearest, pos_prob)

    # For calculating negative probability
    affinity = math_ops.exp(-pdist_matrix / sigma2) - array_ops.diag(
        array_ops.ones([batch_size]))
    d_a_n = math_ops.multiply(
        math_ops.cast(adjacency_not, dtype=dtypes.float32), affinity)
    neg_prob = math_ops.divide(d_a_n,
                               tf.reduce_sum(d_a_n, axis=1, keepdims=True))

    # Set neg-prob of nearest to 1 in case of nan.
    mask_is_nan = tf.tile(
        tf.math.is_nan(tf.reduce_sum(neg_prob, axis=1, keepdims=True)),
        [1, embeddings.shape.as_list()[0]])
    pdist_matrix_neg = math_ops.multiply(
        math_ops.cast(adjacency, dtype=dtypes.float32), pdist_matrix)

    select_nearest = tf.cast(tf.math.equal(
        pdist_matrix_pos, tf.reduce_max(pdist_matrix_neg,
                                        axis=1,
                                        keepdims=True)),
                             dtype=dtypes.float32)
    #     print (select_nearest)
    neg_prob = array_ops.where(mask_is_nan, select_nearest, neg_prob)

    return pos_prob, neg_prob