Ejemplo n.º 1
0
    def _set_target(self, y, num_targets, target_weights):
        """Sets the target defined by at least one of y, b, and m."""
        if y is None and num_targets is None and target_weights is None:
            num_targets = tf.cast(self._n, dtype=self.dtype)

        # First we set the number of targets
        self._num_targets = num_targets
        if self._num_targets is None:
            if target_weights is not None:
                if isinstance(target_weights, tf.Tensor):
                    self._num_targets = tf.shape(target_weights)[-1]
                else:
                    self._num_targets = len(target_weights)
            elif y is not None:
                self._num_targets = tf.shape(y)[1]

        # Then we set the target vector itself. It must be sorted.
        if y is None:
            m = tf.cast(self._num_targets, dtype=self.dtype)
            y = tf.range(0, self._num_targets, dtype=self.dtype) / (m - 1)
        self.y = self._cast_may_repeat(y)
        if self._descending:
            self.y = tf.reverse(self.y, (1, ))

        # Last we set target_weights
        if target_weights is None:
            m = tf.cast(self._num_targets, dtype=self.dtype)
            target_weights = tf.ones(tf.shape(self.y), dtype=self.dtype) / m
        self.target_weights = self._cast_may_repeat(target_weights)
        if self._descending:
            self.target_weights = tf.reverse(self.target_weights, (1, ))
Ejemplo n.º 2
0
def flip(m, axis=None):  # pylint: disable=missing-docstring
  m = asarray(m).data

  if axis is None:
    return utils.tensor_to_ndarray(tf.reverse(m, tf.range(tf.rank(m))))

  axis = utils._canonicalize_axis(axis, tf.rank(m))  # pylint: disable=protected-access

  return utils.tensor_to_ndarray(tf.reverse(m, [axis]))
Ejemplo n.º 3
0
def get_last_true_column(x):
    """Gets the last True value in each col as True and all others False.

  Similar to `get_first_true_column` except it is reversed.

  Args:
    x: A bool tensor with shape [num_steps, batch_size]

  Returns:
    A bool tensor of the same shape with the last True element in each col set
      to True and every one else set to False.
  """
    # Reverse the row order.
    x = tf.reverse(x, axis=[0])
    x_first_true = get_first_true_column(x)
    # Reverse the rows back.
    return tf.reverse(x_first_true, axis=[0])
Ejemplo n.º 4
0
def business_day_mappers(weekend_mask=None, holidays=None):
    """Returns functions to map from ordinal to biz day and back."""
    if weekend_mask is None and holidays is None:
        return (lambda x: (x, tf.ones_like(x, dtype=tf.bool))), (lambda x: x)

    weekday_fwd, weekday_back = _week_day_mappers(weekend_mask)

    if holidays is None:
        return weekday_fwd, weekday_back

    # Apply the weekend adjustment to the holidays as well
    holidays_raw = tf.convert_to_tensor(holidays, dtype=tf.int32)
    holidays, is_weekday = weekday_fwd(holidays_raw)

    # Keep only the holidays that are not on weekends
    holidays = holidays[is_weekday]

    # The above step can lead to an empty holidays set which causes problems.
    # To mitigate this, we add a safe fake holiday.
    holidays = tf.concat([[0], holidays], axis=0)
    reverse_holidays = tf.reverse(-holidays, axis=[0])
    num_holidays = tf.size(holidays) - 1

    def bizday_fwd(x):
        """Calculates business day ordinal and whether it is a business day."""
        left = tf.searchsorted(holidays, x, side='left')
        right = num_holidays - tf.searchsorted(
            reverse_holidays, -x, side='left')
        is_bizday = tf.not_equal(left, right)
        bizday_ordinal = x - right
        return bizday_ordinal, is_bizday

    cum_holidays = tf.range(num_holidays + 1, dtype=holidays.dtype)
    bizday_at_holidays = holidays - cum_holidays

    def bizday_back(x):
        left = tf.searchsorted(bizday_at_holidays, x, side='left')
        ordinal = x + left - 1
        return ordinal

    def from_ordinal(ordinals):
        """Maps ordinals to business day and whether it is a work day."""
        ordinals = tf.convert_to_tensor(ordinals, dtype=tf.int32)
        weekday_values, is_weekday = weekday_fwd(ordinals)
        biz_ordinal, is_bizday = bizday_fwd(weekday_values)
        return biz_ordinal, (is_weekday & is_bizday)

    def to_ordinal(biz_values):
        """Maps from business day count to ordinals."""
        return weekday_back(bizday_back(biz_values))

    return from_ordinal, to_ordinal
Ejemplo n.º 5
0
  def _cartesian_to_hyperspherical(self, x_c):
    """Helper for log_prob.

    As a convention here, the first element is the radius and the rest are
    angles.

    Assumes only one dimension representing a vector in cartesian space.

    Args:
      x_c: `Tensor` input in cartesian coordinates (weight space). Has shape
        sample_shape + batch_shape + flattened_event_shape.

    Returns:
      x_r: same dimensions converted to hyperspherical coordinates.
    """
    # x_i = Sum of x_j ** 2 for j = N - i
    base = tf.reverse(x_c, [-1])
    base = tf.math.cumsum(base**2, -1)

    # Now we flip back which gives us
    # [x_n**2 + x_{n-1}**2 + ... + x_1**2, ..., x_n**2]
    base = tf.reverse(base, [-1])
    base = tf.math.sqrt(base)
    theta = tf.math.atan(base[..., 1:] / x_c[..., 0:-1]) + (
        tf.constant(math.pi) / 2)  # The first element is x_0 / base_1

    # Correcting the last element
    theta[..., -1] = tf.math.atan(
        x_c[..., -1] /
        (x_c[..., -2] + tf.math.sqrt((x_c[..., -1]**2) +
                                     (x_c[..., -2]**2)))) + (
                                         tf.constant(math.pi) / 2)

    # Now we add the radius to the start,
    # which is just the first term of the base vector that we haven't used
    x_r = tf.cat((tf.expand_dims(base[..., 0], -1), theta), 1)

    return x_r
Ejemplo n.º 6
0
def fill_triangular(x, upper=False, name=None):
    """Creates a (batch of) triangular matrix from a vector of inputs.

  Created matrix can be lower- or upper-triangular. (It is more efficient to
  create the matrix as upper or lower, rather than transpose.)

  Triangular matrix elements are filled in a clockwise spiral. See example,
  below.

  If `x.shape` is `[b1, b2, ..., bB, d]` then the output shape is
  `[b1, b2, ..., bB, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
  `n = int(np.sqrt(0.25 + 2. * m) - 0.5)`.

  Example:

  ```python
  fill_triangular([1, 2, 3, 4, 5, 6])
  # ==> [[4, 0, 0],
  #      [6, 5, 0],
  #      [3, 2, 1]]

  fill_triangular([1, 2, 3, 4, 5, 6], upper=True)
  # ==> [[1, 2, 3],
  #      [0, 5, 6],
  #      [0, 0, 4]]
  ```

  The key trick is to create an upper triangular matrix by concatenating `x`
  and a tail of itself, then reshaping.

  Suppose that we are filling the upper triangle of an `n`-by-`n` matrix `M`
  from a vector `x`. The matrix `M` contains n**2 entries total. The vector `x`
  contains `n * (n+1) / 2` entries. For concreteness, we'll consider `n = 5`
  (so `x` has `15` entries and `M` has `25`). We'll concatenate `x` and `x` with
  the first (`n = 5`) elements removed and reversed:

  ```python
  x = np.arange(15) + 1
  xc = np.concatenate([x, x[5:][::-1]])
  # ==> array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 14, 13,
  #            12, 11, 10, 9, 8, 7, 6])

  # (We add one to the arange result to disambiguate the zeros below the
  # diagonal of our upper-triangular matrix from the first entry in `x`.)

  # Now, when reshapedlay this out as a matrix:
  y = np.reshape(xc, [5, 5])
  # ==> array([[ 1,  2,  3,  4,  5],
  #            [ 6,  7,  8,  9, 10],
  #            [11, 12, 13, 14, 15],
  #            [15, 14, 13, 12, 11],
  #            [10,  9,  8,  7,  6]])

  # Finally, zero the elements below the diagonal:
  y = np.triu(y, k=0)
  # ==> array([[ 1,  2,  3,  4,  5],
  #            [ 0,  7,  8,  9, 10],
  #            [ 0,  0, 13, 14, 15],
  #            [ 0,  0,  0, 12, 11],
  #            [ 0,  0,  0,  0,  6]])
  ```

  From this example we see that the resuting matrix is upper-triangular, and
  contains all the entries of x, as desired. The rest is details:
  - If `n` is even, `x` doesn't exactly fill an even number of rows (it fills
    `n / 2` rows and half of an additional row), but the whole scheme still
    works.
  - If we want a lower triangular matrix instead of an upper triangular,
    we remove the first `n` elements from `x` rather than from the reversed
    `x`.

  For additional comparisons, a pure numpy version of this function can be found
  in `distribution_util_test.py`, function `_fill_triangular`.

  Args:
    x: `Tensor` representing lower (or upper) triangular elements.
    upper: Python `bool` representing whether output matrix should be upper
      triangular (`True`) or lower triangular (`False`, default).
    name: Python `str`. The name to give this op.

  Returns:
    tril: `Tensor` with lower (or upper) triangular elements filled from `x`.

  Raises:
    ValueError: if `x` cannot be mapped to a triangular matrix.
  """

    with tf.name_scope(name or 'fill_triangular'):
        x = tf.convert_to_tensor(x, name='x')
        m = tf.compat.dimension_value(
            tensorshape_util.with_rank_at_least(x.shape, 1)[-1])
        if m is not None:
            # Formula derived by solving for n: m = n(n+1)/2.
            m = np.int32(m)
            n = np.sqrt(0.25 + 2. * m) - 0.5
            if n != np.floor(n):
                raise ValueError(
                    'Input right-most shape ({}) does not '
                    'correspond to a triangular matrix.'.format(m))
            n = np.int32(n)
            static_final_shape = tensorshape_util.concatenate(
                x.shape[:-1], [n, n])
        else:
            m = tf.shape(x)[-1]
            # For derivation, see above. Casting automatically lops off the 0.5, so we
            # omit it.  We don't validate n is an integer because this has
            # graph-execution cost; an error will be thrown from the reshape, below.
            n = tf.cast(tf.sqrt(0.25 + tf.cast(2 * m, dtype=tf.float32)),
                        dtype=tf.int32)
            static_final_shape = tensorshape_util.concatenate(
                tensorshape_util.with_rank_at_least(x.shape, 1)[:-1],
                [None, None])

        # Try it out in numpy:
        #  n = 3
        #  x = np.arange(n * (n + 1) / 2)
        #  m = x.shape[0]
        #  n = np.int32(np.sqrt(.25 + 2 * m) - .5)
        #  x_tail = x[(m - (n**2 - m)):]
        #  np.concatenate([x_tail, x[::-1]], 0).reshape(n, n)  # lower
        #  # ==> array([[3, 4, 5],
        #               [5, 4, 3],
        #               [2, 1, 0]])
        #  np.concatenate([x, x_tail[::-1]], 0).reshape(n, n)  # upper
        #  # ==> array([[0, 1, 2],
        #               [3, 4, 5],
        #               [5, 4, 3]])
        #
        # Note that we can't simply do `x[..., -(n**2 - m):]` because this doesn't
        # correctly handle `m == n == 1`. Hence, we do nonnegative indexing.
        # Furthermore observe that:
        #   m - (n**2 - m)
        #   = n**2 / 2 + n / 2 - (n**2 - n**2 / 2 + n / 2)
        #   = 2 (n**2 / 2 + n / 2) - n**2
        #   = n**2 + n - n**2
        #   = n
        ndims = prefer_static.rank(x)
        if upper:
            x_list = [x, tf.reverse(x[..., n:], axis=[ndims - 1])]
        else:
            x_list = [x[..., n:], tf.reverse(x, axis=[ndims - 1])]
        new_shape = (tensorshape_util.as_list(static_final_shape)
                     if tensorshape_util.is_fully_defined(static_final_shape)
                     else tf.concat([tf.shape(x)[:-1], [n, n]], axis=0))
        x = tf.reshape(tf.concat(x_list, axis=-1), new_shape)
        x = tf.linalg.band_part(x,
                                num_lower=(0 if upper else -1),
                                num_upper=(-1 if upper else 0))
        tensorshape_util.set_shape(x, static_final_shape)
        return x
Ejemplo n.º 7
0
def fill_triangular_inverse(x, upper=False, name=None):
    """Creates a vector from a (batch of) triangular matrix.

  The vector is created from the lower-triangular or upper-triangular portion
  depending on the value of the parameter `upper`.

  If `x.shape` is `[b1, b2, ..., bB, n, n]` then the output shape is
  `[b1, b2, ..., bB, d]` where `d = n (n + 1) / 2`.

  Example:

  ```python
  fill_triangular_inverse(
    [[4, 0, 0],
     [6, 5, 0],
     [3, 2, 1]])

  # ==> [1, 2, 3, 4, 5, 6]

  fill_triangular_inverse(
    [[1, 2, 3],
     [0, 5, 6],
     [0, 0, 4]], upper=True)

  # ==> [1, 2, 3, 4, 5, 6]
  ```

  Args:
    x: `Tensor` representing lower (or upper) triangular elements.
    upper: Python `bool` representing whether output matrix should be upper
      triangular (`True`) or lower triangular (`False`, default).
    name: Python `str`. The name to give this op.

  Returns:
    flat_tril: (Batch of) vector-shaped `Tensor` representing vectorized lower
      (or upper) triangular elements from `x`.
  """

    with tf.name_scope(name or 'fill_triangular_inverse'):
        x = tf.convert_to_tensor(x, name='x')
        n = tf.compat.dimension_value(
            tensorshape_util.with_rank_at_least(x.shape, 2)[-1])
        if n is not None:
            n = np.int32(n)
            m = np.int32((n * (n + 1)) // 2)
            static_final_shape = tensorshape_util.concatenate(
                x.shape[:-2], [m])
        else:
            n = tf.shape(x)[-1]
            m = (n * (n + 1)) // 2
            static_final_shape = tensorshape_util.concatenate(
                tensorshape_util.with_rank_at_least(x.shape, 2)[:-2], [None])
        ndims = prefer_static.rank(x)
        if upper:
            initial_elements = x[..., 0, :]
            triangular_portion = x[..., 1:, :]
        else:
            initial_elements = tf.reverse(x[..., -1, :], axis=[ndims - 2])
            triangular_portion = x[..., :-1, :]
        rotated_triangular_portion = tf.reverse(tf.reverse(triangular_portion,
                                                           axis=[ndims - 1]),
                                                axis=[ndims - 2])
        consolidated_matrix = triangular_portion + rotated_triangular_portion
        end_sequence = tf.reshape(
            consolidated_matrix,
            tf.concat([tf.shape(x)[:-2], [n * (n - 1)]], axis=0))
        y = tf.concat([initial_elements, end_sequence[..., :m - n]], axis=-1)
        tensorshape_util.set_shape(y, static_final_shape)
        return y
Ejemplo n.º 8
0
 def f(x, y):  # [4, 2, 3], [4, 2, 1, 3] -> [4, 3, 2]
     return tf.transpose(
         tf.cast(tf.math.cumsum(w1 * x, axis=-1), dtype=tf.float32) +
         tf.square(tf.reverse(w2 * y, axis=[-3]))[..., 0, :],
         perm=[0, 2, 1])
Ejemplo n.º 9
0
def extract_points_from_range_image(laser, calibration, frame_pose):
    """Decode points from lidar."""
    if laser.name != calibration.name:
        raise ValueError('Laser and calibration do not match')
    if laser.name == dataset_pb2.LaserName.TOP:
        frame_pose = tf.convert_to_tensor(
            np.reshape(np.array(frame_pose.transform), [4, 4]))
        range_image_top_pose = dataset_pb2.MatrixFloat.FromString(
            zlib.decompress(laser.ri_return1.range_image_pose_compressed))
        # [H, W, 6]
        range_image_top_pose_tensor = tf.reshape(
            tf.convert_to_tensor(range_image_top_pose.data),
            range_image_top_pose.shape.dims)
        # [H, W, 3, 3]
        range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(
            range_image_top_pose_tensor[..., 0],
            range_image_top_pose_tensor[..., 1],
            range_image_top_pose_tensor[..., 2])
        range_image_top_pose_tensor_translation = range_image_top_pose_tensor[
            ..., 3:]
        range_image_top_pose_tensor = transform_utils.get_transform(
            range_image_top_pose_tensor_rotation,
            range_image_top_pose_tensor_translation)
        frame_pose = tf.expand_dims(frame_pose, axis=0)
        pixel_pose = tf.expand_dims(range_image_top_pose_tensor, axis=0)
    else:
        pixel_pose = None
        frame_pose = None
    first_return = zlib.decompress(laser.ri_return1.range_image_compressed)
    second_return = zlib.decompress(laser.ri_return2.range_image_compressed)
    points_list = []
    for range_image_str in [first_return, second_return]:
        range_image = dataset_pb2.MatrixFloat.FromString(range_image_str)
        if not calibration.beam_inclinations:
            beam_inclinations = range_image_utils.compute_inclination(
                tf.constant([
                    calibration.beam_inclination_min,
                    calibration.beam_inclination_max
                ]),
                height=range_image.shape.dims[0])
        else:
            beam_inclinations = tf.constant(calibration.beam_inclinations)
        beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
        extrinsic = np.reshape(np.array(calibration.extrinsic.transform),
                               [4, 4])
        range_image_tensor = tf.reshape(tf.convert_to_tensor(range_image.data),
                                        range_image.shape.dims)
        range_image_mask = range_image_tensor[..., 0] > 0
        range_image_cartesian = (
            range_image_utils.extract_point_cloud_from_range_image(
                tf.expand_dims(range_image_tensor[..., 0], axis=0),
                tf.expand_dims(extrinsic, axis=0),
                tf.expand_dims(tf.convert_to_tensor(beam_inclinations),
                               axis=0),
                pixel_pose=pixel_pose,
                frame_pose=frame_pose))
        range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
        points_tensor = tf.gather_nd(
            tf.concat([range_image_cartesian, range_image_tensor[..., 1:4]],
                      axis=-1), tf.where(range_image_mask))
        points_list.append(points_tensor.numpy())
    return points_list
Ejemplo n.º 10
0
 def flip():
   if dim < 0 or dim >= (len(tensor.get_shape().as_list())):
     raise ValueError('dim must represent a valid dimension.')
   return tf.reverse(tensor, [dim])
Ejemplo n.º 11
0
        def collater_fn(batch: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
            """Collater function for mention classification task. See BaseTask."""

            new_batch = {}

            # Sample mentions uniformly across batch
            mention_mask = tf.reshape(batch['mention_mask'],
                                      [n_candidate_mentions])
            sample_scores = tf.random.uniform(
                shape=[n_candidate_mentions]) * tf.cast(
                    mention_mask, tf.float32)

            mention_target_indices = tf.reshape(
                batch['mention_target_indices'], [bsz])

            # We want to make sure that the target mentions always have a priority
            # when we sample `max_batch_mentions` out of all available mentions.
            # Additionally, we want these target mentions to be in the same order as
            # their samples. In other words, we want the first sampled mention to be
            # target mention from the first sample, the second sampled mention to be
            # tagret mention from the second sample, etc.

            # Positions of target mentions in the flat array
            mention_target_indices_flat = (tf.cast(
                tf.range(bsz) * max_mentions_per_sample,
                mention_target_indices.dtype) + mention_target_indices)
            # These extra score makes sure that target mentions have a priority and
            # will be sampled in the correct order.
            mention_target_extra_score_flat = tf.cast(
                tf.reverse(tf.range(bsz) + 1, axis=[0]), tf.float32)
            # The model assumes that there is only ONE target mention per sample.
            # Moreover,we want to select them according to the order of samples:
            # target mention from sample 0, target mention from sample 1, ..., etc.
            sample_scores = tf.tensor_scatter_nd_add(
                sample_scores, tf.expand_dims(mention_target_indices_flat, 1),
                mention_target_extra_score_flat)

            sampled_indices = tf.math.top_k(sample_scores,
                                            max_batch_mentions,
                                            sorted=True).indices

            # Double-check target mentions were selected correctly.
            assert_op = tf.assert_equal(
                sampled_indices[:bsz],
                tf.cast(mention_target_indices_flat, sampled_indices.dtype))

            with tf.control_dependencies([assert_op]):
                mention_mask = tf.gather(mention_mask, sampled_indices)
            dtype = batch['mention_start_positions'].dtype
            mention_start_positions = tf.gather(
                tf.reshape(batch['mention_start_positions'],
                           [n_candidate_mentions]), sampled_indices)
            mention_end_positions = tf.gather(
                tf.reshape(batch['mention_end_positions'],
                           [n_candidate_mentions]), sampled_indices)

            mention_batch_positions = tf.gather(
                tf.repeat(tf.range(bsz, dtype=dtype), max_mentions_per_sample),
                sampled_indices)

            new_batch['text_ids'] = batch['text_ids']
            new_batch['text_mask'] = batch['text_mask']
            new_batch['classifier_target'] = tf.reshape(
                batch['target'], [bsz, config.max_num_labels_per_sample])
            new_batch['classifier_target_mask'] = tf.reshape(
                batch['target_mask'], [bsz, config.max_num_labels_per_sample])

            new_batch['mention_mask'] = mention_mask
            new_batch['mention_start_positions'] = mention_start_positions
            new_batch['mention_end_positions'] = mention_end_positions
            new_batch['mention_batch_positions'] = mention_batch_positions
            new_batch['mention_target_indices'] = tf.range(bsz, dtype=dtype)

            if config.get('max_length_with_entity_tokens') is not None:
                batch_with_entity_tokens = mention_preprocess_utils.add_entity_tokens(
                    text_ids=new_batch['text_ids'],
                    text_mask=new_batch['text_mask'],
                    mention_mask=new_batch['mention_mask'],
                    mention_batch_positions=new_batch[
                        'mention_batch_positions'],
                    mention_start_positions=new_batch[
                        'mention_start_positions'],
                    mention_end_positions=new_batch['mention_end_positions'],
                    new_length=config.max_length_with_entity_tokens,
                )
                # Update `text_ids`, `text_mask`, `mention_mask`, `mention_*_positions`
                new_batch.update(batch_with_entity_tokens)
                # Update `max_length`
                max_length = config.max_length_with_entity_tokens
            else:
                max_length = encoder_config.max_length

            new_batch['mention_target_batch_positions'] = tf.gather(
                new_batch['mention_batch_positions'],
                new_batch['mention_target_indices'])
            new_batch['mention_target_start_positions'] = tf.gather(
                new_batch['mention_start_positions'],
                new_batch['mention_target_indices'])
            new_batch['mention_target_end_positions'] = tf.gather(
                new_batch['mention_end_positions'],
                new_batch['mention_target_indices'])
            new_batch['mention_target_weights'] = tf.ones(bsz)

            # Fake IDs -- some encoders (ReadTwice) need them
            new_batch['mention_target_ids'] = tf.zeros(bsz)

            new_batch['segment_ids'] = tf.zeros_like(new_batch['text_ids'])

            position_ids = tf.expand_dims(tf.range(max_length, dtype=dtype),
                                          axis=0)
            new_batch['position_ids'] = tf.tile(position_ids, (bsz, 1))

            return new_batch
Ejemplo n.º 12
0
def gpu_gru(
    inputs,
    init_h,
    kernel,
    recurrent_kernel,
    bias,
    mask,
    time_major,
    go_backwards,
    sequence_lengths,
    return_sequences,
):
    """GRU with cuDNN implementation which is only available for GPU."""
    if mask is not None:
        sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask(
            mask, time_major)

    if not time_major and sequence_lengths is None:
        inputs = tf.transpose(inputs, perm=(1, 0, 2))
        seq_axis, batch_axis = (0, 1)
    else:
        seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
    # For init_h, cuDNN expects one more dim of num_layers before or after batch
    # dim for time major or batch major inputs respectively
    init_h = tf.expand_dims(init_h, axis=seq_axis)

    weights = tf.split(kernel, 3, axis=1)
    weights += tf.split(recurrent_kernel, 3, axis=1)
    # Note that the bias was initialized as shape (2, 3 * units), flat it into
    # (6 * units)
    bias = tf.split(backend.flatten(bias), 6)

    if tf.sysconfig.get_build_info()["is_cuda_build"]:
        # Note that the gate order for cuDNN is different from the canonical format.
        # canonical format is [z, r, h], whereas cuDNN is [r, z, h]. The swap need
        # to be done for kernel, recurrent_kernel, input_bias, recurrent_bias.
        # z is update gate weights.
        # r is reset gate weights.
        # h is output gate weights.
        weights[0], weights[1] = weights[1], weights[0]
        weights[3], weights[4] = weights[4], weights[3]
        bias[0], bias[1] = bias[1], bias[0]
        bias[3], bias[4] = bias[4], bias[3]

    params = gru_lstm_utils.canonical_to_params(
        weights=weights,
        biases=bias,
        shape=tf.constant([-1]),
        transpose_weights=True,
    )

    if sequence_lengths is not None:
        if go_backwards:
            # Three reversals are required. E.g.,
            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
            # output_from_cudnn = [6, 5, 4, 0, 0]
            # expected_output = [0, 0, 6, 5 ,4]
            inputs = tf.reverse_sequence(
                inputs,
                sequence_lengths,
                seq_axis=seq_axis,
                batch_axis=batch_axis,
            )
        outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(
            input=inputs,
            input_h=init_h,
            input_c=0,
            params=params,
            is_training=True,
            rnn_mode="gru",
            sequence_lengths=sequence_lengths,
            time_major=time_major,
        )
        if go_backwards:
            outputs = tf.reverse_sequence(
                outputs,
                sequence_lengths,
                seq_axis=seq_axis,
                batch_axis=batch_axis,
            )
            outputs = tf.reverse(outputs, axis=[seq_axis])
    else:
        if go_backwards:
            # Reverse axis 0 since the input is already convert to time major.
            inputs = tf.reverse(inputs, axis=[0])
        outputs, h, _, _ = tf.raw_ops.CudnnRNN(
            input=inputs,
            input_h=init_h,
            input_c=0,
            params=params,
            is_training=True,
            rnn_mode="gru",
        )

    last_output = outputs[-1]
    if not time_major and sequence_lengths is None and return_sequences:
        outputs = tf.transpose(outputs, perm=[1, 0, 2])
    h = tf.squeeze(h, axis=seq_axis)

    # In the case of variable length input, the cudnn kernel will fill zeros for
    # the output, whereas the default keras behavior is to bring over the previous
    # output for t-1, so that in the return_sequence=False case, user can quickly
    # get the final effect output instead just 0s at the last timestep.
    # In order to mimic the default keras behavior, we copy the final h state as
    # the last_output, since it is numerically same as the output.
    if sequence_lengths is not None:
        last_output = h

    # Match CPU return format
    if not return_sequences:
        outputs = tf.expand_dims(last_output, axis=0 if time_major else 1)

    return (
        last_output,
        outputs,
        h,
        gru_lstm_utils.runtime(gru_lstm_utils.RUNTIME_GPU),
    )
Ejemplo n.º 13
0
def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask,
             time_major, go_backwards, sequence_lengths):
  """LSTM with either cuDNN or ROCm implementation which is only available for GPU.

  Note that currently only right padded data is supported, or the result will be
  polluted by the unmasked data which should be filtered.

  Args:
    inputs: Input tensor of LSTM layer.
    init_h: Initial state tensor for the cell output.
    init_c: Initial state tensor for the cell hidden state.
    kernel: Weights for cell kernel.
    recurrent_kernel: Weights for cell recurrent kernel.
    bias: Weights for cell kernel bias and recurrent bias. Only recurrent bias
      is used in this case.
    mask: Boolean tensor for mask out the steps within sequence. An individual
      `True` entry indicates that the corresponding timestep should be utilized,
      while a `False` entry indicates that the corresponding timestep should be
      ignored.
    time_major: Boolean, whether the inputs are in the format of [time, batch,
      feature] or [batch, time, feature].
    go_backwards: Boolean (default False). If True, process the input sequence
      backwards and return the reversed sequence.
    sequence_lengths: The lengths of all sequences coming from a variable length
      input, such as ragged tensors. If the input has a fixed timestep size,
      this should be None.

  Returns:
    last_output: Output tensor for the last timestep, which has shape
      [batch, units].
    outputs: Output tensor for all timesteps, which has shape
      [batch, time, units].
    state_0: The cell output, which has same shape as init_h.
    state_1: The cell hidden state, which has same shape as init_c.
    runtime: Constant string tensor which indicate real runtime hardware. This
      value is for testing purpose and should not be used by user.
  """
  if mask is not None:
    sequence_lengths = gru_lstm_utils.calculate_sequence_by_mask(
        mask, time_major)

  if not time_major and sequence_lengths is None:
    inputs = tf.transpose(inputs, perm=(1, 0, 2))
    seq_axis, batch_axis = (0, 1)
  else:
    seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
  # For init_h and init_c, cuDNN expects one more dim of num_layers before or
  # after batch dim for time major or batch major inputs respectively
  init_h = tf.expand_dims(init_h, axis=seq_axis)
  init_c = tf.expand_dims(init_c, axis=seq_axis)

  weights = tf.split(kernel, 4, axis=1)
  weights += tf.split(recurrent_kernel, 4, axis=1)
  # cuDNN has an extra set of bias for inputs, we disable them (setting to 0),
  # so that mathematically it is same as the canonical LSTM implementation.
  full_bias = tf.concat((tf.zeros_like(bias), bias), 0)

  if tf.sysconfig.get_build_info()['is_rocm_build']:
    # ROCm MIOpen's weight sequence for LSTM is different from both canonical
    # and Cudnn format
    # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o]
    # i is input gate weights.
    # f is forget gate weights.
    # o is output gate weights.
    # c is cell gate weights.
    weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]
    # full_bias is a tensor of shape (8*n,)
    full_bias = tf.split(full_bias, 8, axis=0)
    full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]

  params = gru_lstm_utils.canonical_to_params(
      weights=weights,
      biases=tf.split(full_bias, 8),
      shape=tf.constant([-1]),
      transpose_weights=True)

  if sequence_lengths is not None:
    if go_backwards:
      # Three reversals are required. E.g.,
      # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
      # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
      # output_from_cudnn = [6, 5, 4, 0, 0]
      # expected_output = [0, 0, 6, 5 ,4]
      inputs = tf.reverse_sequence(
          inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
    outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
        input=inputs,
        input_h=init_h,
        input_c=init_c,
        params=params,
        is_training=True,
        rnn_mode='lstm',
        sequence_lengths=sequence_lengths,
        time_major=time_major)
    if go_backwards:
      outputs = tf.reverse_sequence(
          outputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
      outputs = tf.reverse(outputs, axis=[seq_axis])
  else:
    # # Fill the array with shape [batch] with value of max timesteps.
    # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
    #                                  array_ops.shape(inputs)[0])
    if go_backwards:
      # Reverse axis 0 since the input is already convert to time major.
      inputs = tf.reverse(inputs, axis=[0])
    outputs, h, c, _ = tf.raw_ops.CudnnRNN(
        input=inputs, input_h=init_h, input_c=init_c, params=params,
        is_training=True, rnn_mode='lstm')

  last_output = outputs[-1]
  if not time_major and sequence_lengths is None:
    outputs = tf.transpose(outputs, perm=[1, 0, 2])
  h = tf.squeeze(h, axis=seq_axis)
  c = tf.squeeze(c, axis=seq_axis)

  # In the case of variable length input, the cudnn kernel will fill zeros for
  # the output, whereas the default keras behavior is to bring over the previous
  # output for t-1, so that in the return_sequence=False case, user can quickly
  # get the final effect output instead just 0s at the last timestep.
  # In order to mimic the default keras behavior, we copy the final h state as
  # the last_output, since it is numerically same as the output.
  if sequence_lengths is not None:
    last_output = h
  return last_output, outputs, h, c, gru_lstm_utils.runtime(
      gru_lstm_utils.RUNTIME_GPU)
Ejemplo n.º 14
0
 def corr_matrix(t):
     one = tf.ones_like(t)
     row1 = tf.stack([one, 0.5 * t], axis=-1)
     row2 = tf.reverse(row1, [0])
     corr_matrix = tf.stack([row1, row2], axis=-1)
     return corr_matrix
def _mvnormal_quasi(sample_shape,
                    mean,
                    random_type,
                    seed,
                    covariance_matrix=None,
                    scale_matrix=None,
                    validate_args=False,
                    dtype=None,
                    **kwargs):
    """Returns normal draws using low-discrepancy sequences."""
    (mean, scale_matrix, batch_shape, dim,
     dtype) = _process_mean_scale(mean, scale_matrix, covariance_matrix, dtype)
    # Reverse elements of the batch shape
    batch_shape_reverse = tf.reverse(batch_shape, [0])
    # Transposed shape of the output
    output_shape_t = tf.concat([batch_shape_reverse, sample_shape], -1)
    # Number of quasi random samples
    num_samples = tf.reduce_prod(output_shape_t) // dim
    # Number of initial low discrepancy sequence numbers to skip
    if 'skip' in kwargs:
        skip = kwargs['skip']
    else:
        skip = 0
    if random_type == RandomType.SOBOL:
        # TODO(b/182621549): For Sobol sequences, dimension should be known at graph
        # construction time.
        dim = tf.get_static_value(dim)
        if dim is None:
            raise ValueError(
                'For Sobol sequences, dimension should be known at graph'
                ' construction time.')
        # Shape [num_samples, dim] of the Sobol samples
        low_discrepancy_seq = sobol.sample(dim=dim,
                                           num_results=num_samples,
                                           skip=skip,
                                           dtype=dtype)
    else:  # HALTON or HALTON_RANDOMIZED random_dtype
        if 'randomization_params' in kwargs:
            randomization_params = kwargs['randomization_params']
        else:
            randomization_params = None
        randomized = random_type == RandomType.HALTON_RANDOMIZED
        # Shape [num_samples, dim] of the Sobol samples
        low_discrepancy_seq, _ = halton.sample(
            dim=dim,
            sequence_indices=tf.range(skip, skip + num_samples),
            randomized=randomized,
            randomization_params=randomization_params,
            seed=seed,
            validate_args=validate_args,
            dtype=dtype)

    # Transpose to the shape [dim, num_samples]
    low_discrepancy_seq = tf.transpose(low_discrepancy_seq)
    size_sample = tf.size(sample_shape)
    size_batch = tf.size(batch_shape)
    # Permutation for `output_shape_t` to the output shape
    permutation = tf.concat([
        tf.range(size_batch, size_batch + size_sample),
        tf.range(size_batch - 1, -1, -1)
    ], -1)
    # Reshape Sobol samples to the correct output shape
    low_discrepancy_seq = tf.transpose(
        tf.reshape(low_discrepancy_seq, output_shape_t), permutation)
    # Apply inverse Normal CDF to Sobol samples to obtain the corresponding
    # Normal samples
    samples = tf.math.erfinv((low_discrepancy_seq - 0.5) * 2) * _SQRT_2
    if scale_matrix is None:
        return mean + samples
    else:
        return mean + tf.linalg.matvec(scale_matrix, samples)
Ejemplo n.º 16
0
def bracket_root(objective_fn,
                 dtype=tf.float32,
                 num_points=512,
                 name='bracket_root'):
    """Finds bounds that bracket a root of the objective function.

  This method attempts to return an interval bracketing a root of the objective
  function. It evaluates the objective in parallel at `num_points`
  locations, at exponentially increasing distance from the origin, and returns
  the first pair of adjacent points `[low, high]` such that the objective is
  finite and has a different sign at the two points. If no such pair was
  observed, it returns the trivial interval
  `[np.finfo(dtype).min, np.finfo(dtype).max]` containing all float values of
  the specified `dtype`. If the objective has multiple
  roots, the returned interval will contain at least one (but perhaps not all)
  of the roots.

  Args:
    objective_fn: Python callable for which roots are searched. It must be a
      continuous function that accepts a scalar `Tensor` of type `dtype` and
      returns a `Tensor` of shape `batch_shape`.
    dtype: Optional float `dtype` of inputs to `objective_fn`.
      Default value: `tf.float32`.
    num_points: Optional Python `int` number of points at which to evaluate
      the objective.
      Default value: `512`.
    name: Python `str` name given to ops created by this method.
  Returns:
    low: Float `Tensor` of shape `batch_shape` and dtype `dtype`. Lower bound
      on a root of `objective_fn`.
    high: Float `Tensor` of shape `batch_shape` and dtype `dtype`. Upper bound
      on a root of `objective_fn`.
  """
    with tf.name_scope(name):
        # Build a logarithmic sequence of `num_points` values from -inf to inf.
        dtype_info = np.finfo(dtype_util.as_numpy_dtype(dtype))
        xs_positive = tf.exp(
            tf.linspace(tf.cast(-10., dtype), tf.math.log(dtype_info.max),
                        num_points // 2))
        xs = tf.concat([tf.reverse(-xs_positive, axis=[0]), xs_positive],
                       axis=0)

        # Evaluate the objective at all points. The objective function may return
        # a batch of values (e.g., `objective(x) = x - batch_of_roots`).
        if NUMPY_MODE:
            objective_output_spec = objective_fn(tf.zeros([], dtype=dtype))
        else:
            objective_output_spec = callable_util.get_output_spec(
                objective_fn, tf.convert_to_tensor(0., dtype=dtype))
        batch_ndims = tensorshape_util.rank(objective_output_spec.shape)
        if batch_ndims is None:
            raise ValueError('Cannot infer tensor rank of objective values.')
        xs_pad_shape = ps.pad([num_points],
                              paddings=[[0, batch_ndims]],
                              constant_values=1)
        ys = objective_fn(tf.reshape(xs, xs_pad_shape))

        # Find the smallest point where the objective is finite.
        is_finite = tf.math.is_finite(ys)
        ys_transposed = distribution_util.move_dimension(  # For batch gather.
            ys, 0, -1)
        first_finite_value = tf.gather(
            ys_transposed,
            tf.argmax(is_finite, axis=0),  # Index of smallest finite point.
            batch_dims=batch_ndims,
            axis=-1)
        # Select the next point where the objective has a different sign.
        sign_change_idx = tf.argmax(
            tf.not_equal(tf.math.sign(ys), tf.math.sign(first_finite_value))
            & is_finite,
            axis=0)
        # If the sign never changes, we can't bracket a root.
        bracketing_failed = tf.equal(sign_change_idx, 0)
        # If the objective's sign is zero, we've found an actual root.
        root_found = tf.equal(
            tf.gather(tf.math.sign(ys_transposed),
                      sign_change_idx,
                      batch_dims=batch_ndims,
                      axis=-1), 0.)
        return _structure_broadcasting_where(
            bracketing_failed,
            # If we didn't detect a sign change, fall back to the trivial interval.
            (dtype_info.min, dtype_info.max),
            # Otherwise, return the points around the sign change, unless we
            # actually evaluated a root, in which case, return the zero-width
            # bracket at that root.
            (tf.gather(
                xs,
                tf.where(bracketing_failed | root_found, sign_change_idx,
                         sign_change_idx - 1)), tf.gather(xs, sign_change_idx)
             ))