Exemple #1
0
    def _compute_auxiliary_structure(self, contents_and_mask):
        """Compute segment and position metadata."""
        contents = contents_and_mask[:, : self._num_sequences]
        start_mask = tf.cast(
            contents_and_mask[:, self._num_sequences :], dtype=INDEX_DTYPE
        )

        segment = tf.cumsum(start_mask, axis=0)
        uniform_count = tf.ones_like(segment[:, 0])
        position = []
        for i in range(self._num_sequences):
            segment_slice = segment[:, i]
            counts = tf.math.segment_sum(uniform_count, segment[:, i])
            position.append(
                tf.range(self._packed_length)
                - tf.cumsum(tf.gather(counts, segment_slice - 1) * start_mask[:, i])
            )
        position = tf.concat([i[:, tf.newaxis] for i in position], axis=1)

        # Correct for padding tokens.
        pad_mask = tf.cast(tf.not_equal(contents, 0), dtype=INDEX_DTYPE)
        segment *= pad_mask
        position *= pad_mask

        return segment, position
Exemple #2
0
def get_random_walk_noise_for_position_sequence(vel_sequence,
                                                noise_std_last_step):
    """Returns random-walk noise in the velocity applied to the position."""

    acc_sequence = learned_simulator.time_diff(vel_sequence)

    # We want the noise scale in the velocity at the last step to be fixed.
    # Because we are going to compose noise at each step using a random_walk:
    # std_last_step**2 = num_acc * std_each_step**2
    # so to keep `std_last_step` fixed, we apply at each step:
    # std_each_step `std_last_step / np.sqrt(num_input_velocities)`
    num_acc = acc_sequence.shape.as_list()[1]
    acc_sequence_noise = tf.random.normal(tf.shape(acc_sequence),
                                          stddev=noise_std_last_step /
                                          num_acc**0.5,
                                          dtype=vel_sequence.dtype)

    # Apply the random walk.
    acc_sequence_noise = tf.cumsum(acc_sequence_noise, axis=1)

    # Integrate the noise in the velocity to the positions, assuming
    # an Euler integrator and a dt = 1, and adding no noise to the very first
    # position (since that will only be used to calculate the first position
    # change).
    vel_sequence_noise = tf.concat([
        tf.zeros_like(acc_sequence_noise[:, 0:1]),
        tf.cumsum(acc_sequence_noise, axis=1)
    ],
                                   axis=1)

    return vel_sequence_noise
Exemple #3
0
def get_random_walk_noise_for_position_sequence(position_sequence,
                                                noise_std_last_step):
    """Returns random-walk noise in the velocity applied to the position."""

    velocity_sequence = learned_simulator.time_diff(position_sequence)
    # input_sequence[:, 1:] - input_sequence[:, :-1]

    # We want the noise scale in the velocity at the last step to be fixed.
    # Because we are going to compose noise at each step using a random_walk:
    # std_last_step**2 = num_velocities * std_each_step**2
    # so to keep `std_last_step` fixed, we apply at each step:
    # std_each_step `std_last_step / np.sqrt(num_input_velocities)`
    # TODO(alvarosg): Make sure this is consistent with the value and
    # description provided in the paper.
    num_velocities = velocity_sequence.shape.as_list()[1]
    velocity_sequence_noise = tf.random.normal(tf.shape(velocity_sequence),
                                               stddev=noise_std_last_step /
                                               num_velocities**0.5,
                                               dtype=position_sequence.dtype)

    # Apply the random walk.
    velocity_sequence_noise = tf.cumsum(velocity_sequence_noise, axis=1)

    # Integrate the noise in the velocity to the positions, assuming
    # an Euler intergrator and a dt = 1, and adding no noise to the very first
    # position (since that will only be used to calculate the first position
    # change).
    position_sequence_noise = tf.concat([
        tf.zeros_like(velocity_sequence_noise[:, 0:1]),
        tf.cumsum(velocity_sequence_noise, axis=1)
    ],
                                        axis=1)

    return position_sequence_noise
 def strategy(self, outputs, X, y, nump=False, cumulative=False):
     price_changes = X[:, 1:, :] - X[:, :-1, :]
     helper0 = 0.05 * X[:, 1:2, :]
     prices = X[:, 1:, :]
     strategychangeshelper = outputs[:, 1:, :] - outputs[:, :-1, :]
     strategychangeshelper = strategychangeshelper[:, 1:, :]
     helper1 = outputs[:, 1:2, :]
     helper2 = outputs[:, self.ttm - 2:self.ttm - 1, :]
     if cumulative:
         if nump:
             price_changes[:, 0, :] = 0.05 * price_changes[:, 0, :]
             strategychanges = np.concatenate(
                 [helper1, strategychangeshelper, helper2], axis=1)
             gains_of_trade = np.cumsum(np.sum(price_changes * outputs,
                                               axis=2),
                                        axis=1)
             transaction_costs = np.cumsum(np.sum(np.abs(prices) *
                                                  np.abs(strategychanges),
                                                  axis=2),
                                           axis=1)
         else:
             price_changes = price_changes[:, 1:, :]
             price_changes = tf.concat([helper0, price_changes], axis=1)
             strategychanges = tf.concat(
                 [helper1, strategychangeshelper, helper2], axis=1)
             gains_of_trade = tf.cumsum(tf.reduce_sum(price_changes *
                                                      outputs,
                                                      axis=2),
                                        axis=1)
             transaction_costs = tf.cumsum(tf.reduce_sum(
                 np.abs(prices) * np.abs(strategychanges), axis=2),
                                           axis=1)
     else:
         if nump:
             price_changes[:, 0, :] = 0.05 * price_changes[:, 0, :]
             strategychanges = np.concatenate(
                 [helper1, strategychangeshelper, helper2], axis=1)
             gains_of_trade = np.sum(np.sum(price_changes * outputs,
                                            axis=1),
                                     axis=1)
             transaction_costs = np.sum(np.sum(np.abs(prices) *
                                               np.abs(strategychanges),
                                               axis=1),
                                        axis=1)
         else:
             price_changes = price_changes[:, 1:, :]
             price_changes = tf.concat([helper0, price_changes], axis=1)
             strategychanges = tf.concat(
                 [helper1, strategychangeshelper, helper2], axis=1)
             gains_of_trade = tf.reduce_sum(tf.reduce_sum(tf.multiply(
                 price_changes, outputs),
                                                          axis=1),
                                            axis=1)
             transaction_costs = tf.reduce_sum(tf.reduce_sum(tf.multiply(
                 tf.abs(prices), tf.abs(strategychanges)),
                                                             axis=1),
                                               axis=1)
     return gains_of_trade, transaction_costs
        def infer_step(result, length):
            """Inference step."""
            def print_info(result, length, new_length):
                vocab = self.problem_hparams.vocabulary["targets"]
                tf.logging.info(
                    "length=%s new_length=%s length_diff=%s new_suffix=%s",
                    length,
                    new_length,
                    new_length - length,
                    str([
                        vocab._subtoken_id_to_subtoken_string(index)  # pylint: disable=protected-access
                        for index in result[0, -block_size:, 0,
                                            0][:new_length - length]
                    ]).decode("unicode-escape"),
                )

            features["targets"] = tf.pad(result,
                                         [[0, 0], [0, 1], [0, 0], [0, 0]])
            samples, logits, losses = self.sample(features)  # pylint: disable=unused-variable

            _, top_k_indices = tf.nn.top_k(
                logits[:, :-1, :1, :, :],
                k=self._decode_hparams.guess_and_check_top_k)
            in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices),
                                              tf.expand_dims(result, 4)),
                                     axis=4)

            eos_cumsum = tf.cumsum(tf.to_int32(
                tf.equal(result, text_encoder.EOS_ID)),
                                   axis=1)
            after_eos = tf.greater(common_layers.shift_right(eos_cumsum), 0)

            correct = tf.logical_and(in_top_k, tf.logical_not(after_eos))
            correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1)
            perfect_cumsum = 1 + tf.range(tf.shape(correct)[1])
            for axis in [0, 2, 3]:
                perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis)

            new_length = tf.reduce_sum(tf.to_int32(
                tf.equal(correct_cumsum, perfect_cumsum)),
                                       axis=1)
            new_length = tf.squeeze(new_length, axis=[0, 1, 2])
            new_length = tf.minimum(new_length, decode_length)

            new_result = tf.concat([
                result[:, :new_length, :, :],
                tf.reshape(samples[:, new_length, :block_size, :],
                           [1, block_size, 1, 1])
            ],
                                   axis=1)

            with tf.control_dependencies(
                [tf.py_func(print_info, [result, length, new_length], [])]):
                new_result = tf.identity(new_result)

            return new_result, new_length
Exemple #6
0
def _lovasz_grad(gt_sorted):
    """
    Computes gradient of the Lovasz extension w.r.t sorted errors
    See Alg. 1 in paper
    """
    gts = tf.reduce_sum(gt_sorted)
    intersection = gts - tf.cumsum(gt_sorted)
    union = gts + tf.cumsum(1. - gt_sorted)
    jaccard = 1. - intersection / union
    jaccard = tf.concat((jaccard[0:1], jaccard[1:] - jaccard[:-1]), 0)
    return jaccard
Exemple #7
0
def mean_average_precision(labels,
                           predictions,
                           weights=None,
                           topn=None,
                           name=None):
    """Computes mean average precision (MAP).
    The implementation of MAP is based on Equation (1.7) in the following:
    Liu, T-Y "Learning to Rank for Information Retrieval" found at
    https://www.nowpublishers.com/article/DownloadSummary/INR-016

    Args:
      labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
        relevant example.
      predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
        the ranking score of the corresponding example.
      weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
        former case is per-example and the latter case is per-list.
      topn: A cutoff for how many examples to consider for this metric.
      name: A string used as the name for this metric.

    Returns:
      A metric for the mean average precision.
    """
    with tf.compat.v1.name_scope(metric.name, 'mean_average_precision',
                                 (labels, predictions, weights)):
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, topn)
        sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                             [labels, weights],
                                                             topn=topn)
        # Relevance = 1.0 when labels >= 1.0.
        sorted_relevance = tf.cast(tf.greater_equal(sorted_labels, 1.0),
                                   dtype=tf.float32)
        per_list_relevant_counts = tf.cumsum(sorted_relevance, axis=1)
        per_list_cutoffs = tf.cumsum(tf.ones_like(sorted_relevance), axis=1)
        per_list_precisions = tf.math.divide_no_nan(per_list_relevant_counts,
                                                    per_list_cutoffs)
        total_precision = tf.reduce_sum(input_tensor=per_list_precisions *
                                        sorted_weights * sorted_relevance,
                                        axis=1,
                                        keepdims=True)
        total_relevance = tf.reduce_sum(input_tensor=sorted_weights *
                                        sorted_relevance,
                                        axis=1,
                                        keepdims=True)
        per_list_map = tf.math.divide_no_nan(total_precision, total_relevance)
        # per_list_weights are computed from the whole list to avoid the problem of
        # 0 when there is no relevant example in topn.
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights, tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32))
        return tf.compat.v1.metrics.mean(per_list_map, per_list_weights)
def _word_span_mask(inputs, tgt_len, num_predict, boundary, stride=1):
    """Sample whole word spans as prediction targets."""
    # Note: 1.2 is roughly the token-to-word ratio
    non_pad_len = tgt_len + 1 - stride
    chunk_len_fp = non_pad_len / num_predict / 1.2
    round_to_int = lambda x: tf.cast(tf.round(x), tf.int64)

    # Sample span lengths from a zipf distribution
    span_len_seq = np.arange(FLAGS.min_word, FLAGS.max_word + 1)
    probs = np.array([1.0 / (i + 1) for i in span_len_seq])
    probs /= np.sum(probs)
    logits = tf.constant(np.log(probs), dtype=tf.float32)

    # Sample `num_predict` words here: note that this is over sampling
    span_lens = tf.random.categorical(
        logits=logits[None],
        num_samples=num_predict,
        dtype=tf.int64,
    )[0] + FLAGS.min_word

    # Sample the ratio [0.0, 1.0) of left context lengths
    span_lens_fp = tf.cast(span_lens, tf.float32)
    left_ratio = tf.random.uniform(shape=[num_predict], minval=0.0, maxval=1.0)
    left_ctx_len = left_ratio * span_lens_fp * (chunk_len_fp - 1)

    left_ctx_len = round_to_int(left_ctx_len)
    right_offset = round_to_int(span_lens_fp * chunk_len_fp) - left_ctx_len

    beg_indices = (tf.cumsum(left_ctx_len) +
                   tf.cumsum(right_offset, exclusive=True))
    end_indices = beg_indices + span_lens

    # Remove out of range `boundary` indices
    max_boundary_index = tf.cast(tf.shape(boundary)[0] - 1, tf.int64)
    valid_idx_mask = end_indices < max_boundary_index
    beg_indices = tf.boolean_mask(beg_indices, valid_idx_mask)
    end_indices = tf.boolean_mask(end_indices, valid_idx_mask)

    beg_indices = tf.gather(boundary, beg_indices)
    end_indices = tf.gather(boundary, end_indices)

    # Shuffle valid `position` indices
    num_valid = tf.cast(tf.shape(beg_indices)[0], tf.int64)
    order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int64))
    beg_indices = tf.gather(beg_indices, order)
    end_indices = tf.gather(end_indices, order)

    return _idx_pair_to_mask(beg_indices, end_indices, inputs, tgt_len,
                             num_predict)
Exemple #9
0
def make_att_mask_from_breakpoints(att_breakpoints: tf.Tensor,
                                   use_starting_breakpoints: bool = False,
                                   name: Optional[Text] = None) -> tf.Tensor:
    """Makes self-attention mask from attention breakpoints.

  Each attention breakpoint marks the end of a segment by default (or the
  start if `use_starting_breakpoints` is True), and the resulting
  mask prevents attention across different segments.

  Args:
    att_breakpoints: <int32>[batch_size, seq_len] Tensor containing only 0 and 1
      values, where each "1" marks the end of a segment (or the start, depending
      on `use_starting_breakpoints`).
    use_starting_breakpoints: If True, breakpoints represent starts of segments
      rather than ends of segments. Default False.
    name: A name for the operation (optional).

  Returns:
    <int32>[batch_size, seq_len, seq_len] attention mask.
  """
    with tf.name_scope(name or 'make_att_mask_from_breakpoints'):
        att_breakpoints = tf.convert_to_tensor(att_breakpoints)

        if att_breakpoints.shape.rank != 2:
            raise ValueError('`att_breakpoints` must be a 2-D tensor.')

        if not use_starting_breakpoints:
            att_breakpoints = tensor_utils.shift_elements_right(
                att_breakpoints, axis=-1, amount=1)

        segment_ids = tf.cumsum(att_breakpoints, axis=1)
        return make_segmented_att_mask(segment_ids)
Exemple #10
0
def graves_attn(
    src: tf.Tensor, tgt: tf.Tensor, nk: int, scope: str, w_stddev: float = 0.02
) -> tf.Tensor:
    """Compute context vector and attention matrix.

    See also: Eq. (46-51) A. Graves https://arxiv.org/pdf/1308.0850.pdf

    Args:
        src: float tensor [nb, nsrc, nh]
        tgt: float tensor [nb, ntgt, ng]
        nk: number of Gaussians in attention

    Returns:
        float tensor [nb, ntgt, nh]

    """
    with custom_variable_scope(scope):
        abk = tf.exp(linear(tgt, nk * 3, "linear_abk", w_stddev))
        # [nb, ntgt, 1, nk]
        b = abk[:, :, tf.newaxis, nk : 2 * nk]
        # TODO: be careful for one step version
        k = tf.cumsum(abk[:, :, tf.newaxis, 2 * nk :], axis=1)

        nsrc = shape(src)[1]
        u = tf.reshape(tf.range(nsrc, dtype=k.dtype), (1, 1, nsrc, 1))
        # [nb, ntgt, nsrc, nk]
        e = tf.exp(-b * (k - u))
        # [nb, ntgt, nk, 1]
        a = abk[:, :, :nk, tf.newaxis]
        # [nb, ntgt, nsrc]
        attn = tf.squeeze(tf.matmul(e, a, name="mm_attn"), axis=-1)
        # [nb, ntgt, nh]
        w = tf.matmul(attn, src, name="mm_context")
        return w, attn
Exemple #11
0
def aggregate(tensor):
    """Aggregate a tensor across distributed replicas.

  If not running in a distributed context, this just returns the input tensor.

  Args:
    tensor: tensor aggregate.

  Returns:
    output: A single tensor with all values across different replicas
      concatenated along the first axis.  The output is in order of gpu index.
  """

    replica_ctx = tf.distribute.get_replica_context()
    if not replica_ctx:
        return tensor
    num = tf.shape(tensor)[0:1]
    padded_num = _pad(num, replica_ctx.num_replicas_in_sync,
                      replica_ctx.replica_id_in_sync_group)
    all_num = replica_ctx.all_reduce('sum', padded_num)
    index_in_output = tf.gather(tf.cumsum(tf.concat([[0], all_num], axis=0)),
                                replica_ctx.replica_id_in_sync_group)
    total_num = tf.reduce_sum(all_num)
    padded_tensor = _pad(tensor, total_num, index_in_output)
    return replica_ctx.all_reduce('sum', padded_tensor)
  def specgrams_to_melspecgrams(self, specgrams):
    """Converts specgrams to melspecgrams.

    Args:
      specgrams: Tensor of log magnitudes and instantaneous frequencies,
        shape [batch, time, freq, 2].

    Returns:
      melspecgrams: Tensor of log magnitudes and instantaneous frequencies,
        shape [batch, time, freq, 2], mel scaling of frequencies.
    """
    if self._mel_downscale is None:
      return specgrams

    logmag = specgrams[:, :, :, 0]
    p = specgrams[:, :, :, 1]

    mag2 = tf.exp(2.0 * logmag)
    phase_angle = tf.cumsum(p * np.pi, axis=-2)

    l2mel = tf.to_float(self._linear_to_mel_matrix())
    logmelmag2 = self._safe_log(tf.tensordot(mag2, l2mel, 1))
    mel_phase_angle = tf.tensordot(phase_angle, l2mel, 1)
    mel_p = spectral_util.instantaneous_frequency(mel_phase_angle)

    return tf.concat(
        [logmelmag2[:, :, :, tf.newaxis], mel_p[:, :, :, tf.newaxis]], axis=-1)
Exemple #13
0
def boolean_mask(boxlist, indicator, fields=None, scope=None,
                 use_static_shapes=False, indicator_sum=None):
  """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
  with tf.name_scope(scope, 'BooleanMask'):
    if indicator.shape.ndims != 1:
      raise ValueError('indicator should have rank 1')
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be a boolean tensor')
    if use_static_shapes:
      if not (indicator_sum and isinstance(indicator_sum, int)):
        raise ValueError('`indicator_sum` must be a of type int')
      selected_positions = tf.cast(indicator, dtype=tf.float32)
      indexed_positions = tf.cast(
          tf.multiply(
              tf.cumsum(selected_positions), selected_positions),
          dtype=tf.int32)
      one_hot_selector = tf.one_hot(
          indexed_positions - 1, indicator_sum, dtype=tf.float32)
      sampled_indices = tf.cast(
          tf.tensordot(
              tf.cast(tf.range(tf.shape(indicator)[0]), dtype=tf.float32),
              one_hot_selector,
              axes=[0, 0]),
          dtype=tf.int32)
      return gather(boxlist, sampled_indices, use_static_shapes=True)
    else:
      subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
      if fields is None:
        fields = boxlist.get_extra_fields()
      for field in fields:
        if not boxlist.has_field(field):
          raise ValueError('boxlist must contain all specified fields')
        subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
        subboxlist.add_field(field, subfieldlist)
      return subboxlist
  def melspecgrams_to_specgrams(self, melspecgrams):
    """Converts melspecgrams to specgrams.

    Args:
      melspecgrams: Tensor of log magnitudes and instantaneous frequencies,
        shape [batch, time, freq, 2], mel scaling of frequencies.

    Returns:
      specgrams: Tensor of log magnitudes and instantaneous frequencies,
        shape [batch, time, freq, 2].
    """
    if self._mel_downscale is None:
      return melspecgrams

    logmelmag2 = melspecgrams[:, :, :, 0]
    mel_p = melspecgrams[:, :, :, 1]

    mel2l = tf.to_float(self._mel_to_linear_matrix())
    mag2 = tf.tensordot(tf.exp(logmelmag2), mel2l, 1)
    logmag = 0.5 * self._safe_log(mag2)
    mel_phase_angle = tf.cumsum(mel_p * np.pi, axis=-2)
    phase_angle = tf.tensordot(mel_phase_angle, mel2l, 1)
    p = spectral_util.instantaneous_frequency(phase_angle)

    return tf.concat(
        [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1)
Exemple #15
0
def offsets_to_segment_ids(offsets):
  '''Transforms offsets to segment_ids,
  the segment_ids will be used in tf.segment_sum/segment_mean
  [3, 0, 1, 2] -> [0, 0, 0, 1, 3, 3].
  '''
  c = tf.cumsum(offsets)
  return tf.searchsorted(c, tf.range(c[-1]), side='right')
Exemple #16
0
def unwrap(p, discont=np.pi, axis=-1):
    """Unwrap a cyclical phase tensor.

  Args:
    p: Phase tensor.
    discont: Float, size of the cyclic discontinuity.
    axis: Axis of which to unwrap.

  Returns:
    unwrapped: Unwrapped tensor of same size as input.
  """
    dd = diff(p, axis=axis)
    ddmod = tf.mod(dd + np.pi, 2.0 * np.pi) - np.pi
    idx = tf.logical_and(tf.equal(ddmod, -np.pi), tf.greater(dd, 0))
    ddmod = tf.where(idx, tf.ones_like(ddmod) * np.pi, ddmod)
    ph_correct = ddmod - dd
    idx = tf.less(tf.abs(dd), discont)
    ddmod = tf.where(idx, tf.zeros_like(ddmod), dd)
    ph_cumsum = tf.cumsum(ph_correct, axis=axis)

    shape = p.get_shape().as_list()
    shape[axis] = 1
    ph_cumsum = tf.concat([tf.zeros(shape, dtype=p.dtype), ph_cumsum],
                          axis=axis)
    unwrapped = p + ph_cumsum
    return unwrapped
    def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
                                       num_end_samples, total_num_samples):
        """slices num_start_samples and last num_end_samples from input_tensor.

    Args:
      input_tensor: An int32 tensor of shape [N] to be sliced.
      num_start_samples: Number of examples to be sliced from the beginning
        of the input tensor.
      num_end_samples: Number of examples to be sliced from the end of the
        input tensor.
      total_num_samples: Sum of is num_start_samples and num_end_samples. This
        should be a scalar.

    Returns:
      A tensor containing the first num_start_samples and last num_end_samples
      from input_tensor.

    """
        input_length = tf.shape(input_tensor)[0]
        start_positions = tf.less(tf.range(input_length), num_start_samples)
        end_positions = tf.greater_equal(tf.range(input_length),
                                         input_length - num_end_samples)
        selected_positions = tf.logical_or(start_positions, end_positions)
        selected_positions = tf.cast(selected_positions, tf.float32)
        indexed_positions = tf.multiply(tf.cumsum(selected_positions),
                                        selected_positions)
        one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1,
                                      total_num_samples,
                                      dtype=tf.float32)
        return tf.cast(
            tf.tensordot(tf.cast(input_tensor, tf.float32),
                         one_hot_selector,
                         axes=[0, 0]), tf.int32)
Exemple #18
0
def clip_eta(eta, ord, eps):
    """
  Helper function to clip the perturbation to epsilon norm ball.
  :param eta: A tensor with the current perturbation.
  :param ord: Order of the norm (mimics Numpy).
              Possible values: np.inf, 1 or 2.
  :param eps: Epsilon, bound of the perturbation.
  """

    # Clipping perturbation eta to self.ord norm ball
    if ord not in [np.inf, 1, 2]:
        raise ValueError('ord must be np.inf, 1, or 2.')
    reduc_ind = list(xrange(1, len(eta.get_shape())))
    avoid_zero_div = 1e-12
    if ord == np.inf:
        eta = clip_by_value(eta, -eps, eps)
    elif ord == 1:
        # Implements a projection algorithm onto the l1-ball from
        # (Duchi et al. 2008) that runs in time O(d*log(d)) where d is the
        # input dimension.
        # Paper link (Duchi et al. 2008): https://dl.acm.org/citation.cfm?id=1390191

        eps = tf.cast(eps, eta.dtype)

        dim = tf.reduce_prod(tf.shape(eta)[1:])
        eta_flat = tf.reshape(eta, (-1, dim))
        abs_eta = tf.abs(eta_flat)

        if 'sort' in dir(tf):
            mu = -tf.sort(-abs_eta, axis=-1)
        else:
            # `tf.sort` is only available in TF 1.13 onwards
            mu = tf.nn.top_k(abs_eta, k=dim, sorted=True)[0]
        cumsums = tf.cumsum(mu, axis=-1)
        js = tf.cast(tf.divide(1, tf.range(1, dim + 1)), eta.dtype)
        t = tf.cast(tf.greater(mu - js * (cumsums - eps), 0), eta.dtype)

        rho = tf.argmax(t * cumsums, axis=-1)
        rho_val = tf.reduce_max(t * cumsums, axis=-1)
        theta = tf.divide(rho_val - eps, tf.cast(1 + rho, eta.dtype))

        eta_sgn = tf.sign(eta_flat)
        eta_proj = eta_sgn * tf.maximum(abs_eta - theta[:, tf.newaxis], 0)
        eta_proj = tf.reshape(eta_proj, tf.shape(eta))

        norm = tf.reduce_sum(tf.abs(eta), reduc_ind)
        eta = tf.where(tf.greater(norm, eps), eta_proj, eta)

    elif ord == 2:
        # avoid_zero_div must go inside sqrt to avoid a divide by zero
        # in the gradient through this operation
        norm = tf.sqrt(
            tf.maximum(avoid_zero_div,
                       reduce_sum(tf.square(eta), reduc_ind, keepdims=True)))
        # We must *clip* to within the norm ball, not *normalize* onto the
        # surface of the ball
        factor = tf.minimum(1., div(eps, norm))
        eta = eta * factor
    return eta
Exemple #19
0
    def _subsample_selection_to_desired_neg_pos_ratio(
            self,
            indices,
            match,
            max_negatives_per_positive,
            min_negatives_per_image=0):
        """Subsample a collection of selected indices to a desired neg:pos ratio.

    This function takes a subset of M indices (indexing into a large anchor
    collection of N anchors where M<N) which are labeled as positive/negative
    via a Match object (matched indices are positive, unmatched indices
    are negative).  It returns a subset of the provided indices retaining all
    positives as well as up to the first K negatives, where:
      K=floor(num_negative_per_positive * num_positives).

    For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
    with positives=[2, 5] and negatives=[4, 7, 9, 10] and
    num_negatives_per_positive=1, then the returned subset of indices
    is [2, 4, 5, 7].

    Args:
      indices: An integer tensor of shape [M] representing a collection
        of selected anchor indices
      match: A matcher.Match object encoding the match between anchors and
        groundtruth boxes for a given image, with rows of the Match objects
        corresponding to groundtruth boxes and columns corresponding to anchors.
      max_negatives_per_positive: (float) maximum number of negatives for
        each positive anchor.
      min_negatives_per_image: minimum number of negative anchors for a given
        image. Allow sampling negatives in image without any positive anchors.

    Returns:
      selected_indices: An integer tensor of shape [M'] representing a
        collection of selected anchor indices with M' <= M.
      num_positives: An integer tensor representing the number of positive
        examples in selected set of indices.
      num_negatives: An integer tensor representing the number of negative
        examples in selected set of indices.
    """
        positives_indicator = tf.gather(match.matched_column_indicator(),
                                        indices)
        negatives_indicator = tf.gather(match.unmatched_column_indicator(),
                                        indices)
        num_positives = tf.reduce_sum(
            tf.cast(positives_indicator, dtype=tf.int32))
        max_negatives = tf.maximum(
            min_negatives_per_image,
            tf.cast(max_negatives_per_positive *
                    tf.cast(num_positives, dtype=tf.float32),
                    dtype=tf.int32))
        topk_negatives_indicator = tf.less_equal(
            tf.cumsum(tf.cast(negatives_indicator, dtype=tf.int32)),
            max_negatives)
        subsampled_selection_indices = tf.where(
            tf.logical_or(positives_indicator, topk_negatives_indicator))
        num_negatives = tf.size(subsampled_selection_indices) - num_positives
        return (tf.reshape(tf.gather(indices, subsampled_selection_indices),
                           [-1]), num_positives, num_negatives)
def _token_span_mask(inputs, tgt_len, num_predict, stride=1):
    """Sample token spans as prediction targets."""
    non_pad_len = tgt_len + 1 - stride
    chunk_len_fp = non_pad_len / num_predict
    round_to_int = lambda x: tf.cast(tf.round(x), tf.int64)

    # Sample span lengths from a zipf distribution
    span_len_seq = np.arange(FLAGS.min_tok, FLAGS.max_tok + 1)
    probs = np.array([1.0 / (i + 1) for i in span_len_seq])

    probs /= np.sum(probs)
    logits = tf.constant(np.log(probs), dtype=tf.float32)
    span_lens = tf.random.categorical(
        logits=logits[None],
        num_samples=num_predict,
        dtype=tf.int64,
    )[0] + FLAGS.min_tok

    # Sample the ratio [0.0, 1.0) of left context lengths
    span_lens_fp = tf.cast(span_lens, tf.float32)
    left_ratio = tf.random.uniform(shape=[num_predict], minval=0.0, maxval=1.0)
    left_ctx_len = left_ratio * span_lens_fp * (chunk_len_fp - 1)
    left_ctx_len = round_to_int(left_ctx_len)

    # Compute the offset from left start to the right end
    right_offset = round_to_int(span_lens_fp * chunk_len_fp) - left_ctx_len

    # Get the actual begin and end indices
    beg_indices = (tf.cumsum(left_ctx_len) +
                   tf.cumsum(right_offset, exclusive=True))
    end_indices = beg_indices + span_lens

    # Remove out of range indices
    valid_idx_mask = end_indices < non_pad_len
    beg_indices = tf.boolean_mask(beg_indices, valid_idx_mask)
    end_indices = tf.boolean_mask(end_indices, valid_idx_mask)

    # Shuffle valid indices
    num_valid = tf.cast(tf.shape(beg_indices)[0], tf.int64)
    order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int64))
    beg_indices = tf.gather(beg_indices, order)
    end_indices = tf.gather(end_indices, order)

    return _idx_pair_to_mask(beg_indices, end_indices, inputs, tgt_len,
                             num_predict)
Exemple #21
0
 def build_lut(histo, step):
   # Compute the cumulative sum, shifting by step // 2
   # and then normalization by step.
   lut = (tf.cumsum(histo) + (step // 2)) // step
   # Shift lut, prepending with 0.
   lut = tf.concat([[0], lut[:-1]], 0)
   # Clip the counts to be in range.  This is done
   # in the C code for image.point.
   return tf.clip_by_value(lut, 0, 255)
Exemple #22
0
def top_p_logits(logits, p):
    with tf.variable_scope('top_p_logits'):
        logits_sort = tf.sort(logits, direction='DESCENDING')
        probs_sort = tf.nn.softmax(logits_sort)
        probs_sums = tf.cumsum(probs_sort, axis=1, exclusive=True)
        logits_masked = tf.where(probs_sums < p, logits_sort, tf.ones_like(logits_sort)*1000) # [batchsize, vocab]
        min_logits = tf.reduce_min(logits_masked, axis=1, keepdims=True) # [batchsize, 1]
        return tf.where(
            logits < min_logits,
            tf.ones_like(logits, dtype=logits.dtype) * -1e10,
            logits,
        )
def verb_refs_to_lengths(task, verb_refs, include_eos=True):
  """Computes the length of a sequence."""
  eos_positions = tf.to_int32(tf.expand_dims(
      tf.where(tf.equal(task, 1))[:, 1], 1))
  seq_mask = tf.logical_not(tf.cast(tf.cumsum(tf.to_int32(
      tf.logical_and(
          tf.equal(verb_refs[:, :, 0], eos_positions),
          tf.equal(verb_refs[:, :, 1], eos_positions + 1))), axis=-1), tf.bool))
  lengths = tf.reduce_sum(tf.to_float(seq_mask), axis=-1)
  if include_eos:
    lengths = lengths + 1
  return lengths
Exemple #24
0
def sequence_accuracy(gt_seqs,
                      decode_seqs,
                      gt_seq_lengths,
                      pr_seq_lengths,
                      debug=False,
                      name=""):
    """Computes the complete and the partial sequence accuracy."""
    gt_shape = common_layers.shape_list(gt_seqs)
    pr_shape = common_layers.shape_list(decode_seqs)
    batch_size = gt_shape[0]
    depth = gt_shape[-1]
    gt_len = gt_shape[1]
    pr_len = pr_shape[1]
    max_len = tf.maximum(gt_len, pr_len)
    gt_seqs = tf.pad(gt_seqs, [[0, 0], [0, max_len - gt_len], [0, 0]])
    decode_seqs = tf.pad(decode_seqs, [[0, 0], [0, max_len - pr_len], [0, 0]])
    gt_seqs = tf.where(
        tf.tile(
            tf.expand_dims(tf.sequence_mask(gt_seq_lengths, maxlen=max_len),
                           2), [1, 1, depth]), gt_seqs,
        tf.fill(tf.shape(gt_seqs), -1))
    decode_seqs = tf.where(
        tf.tile(
            tf.expand_dims(tf.sequence_mask(pr_seq_lengths, maxlen=max_len),
                           2), [1, 1, depth]), decode_seqs,
        tf.fill(tf.shape(decode_seqs), -1))
    # [batch_size, decode_length]
    corrects = tf.reduce_all(tf.equal(gt_seqs, decode_seqs), -1)
    correct_mask = tf.reduce_all(corrects, -1)
    # [batch_size]
    if debug:
        incorrect_mask = tf.logical_not(correct_mask)
        incorrect_gt = tf.boolean_mask(gt_seqs, incorrect_mask)
        incorrect_pr = tf.boolean_mask(decode_seqs, incorrect_mask)
        with tf.control_dependencies([
                tf.print(name + "_mismatch",
                         incorrect_gt,
                         incorrect_pr,
                         summarize=1000)
        ]):
            correct_mask = tf.identity(correct_mask)
    correct_seqs = tf.to_float(correct_mask)
    total_correct_seqs = tf.reduce_sum(correct_seqs)
    mean_complete_accuracy = total_correct_seqs / tf.to_float(batch_size)
    # Compute partial accuracy
    errors = tf.logical_not(corrects)
    errors = tf.cast(tf.cumsum(tf.to_float(errors), axis=-1), tf.bool)
    # [batch_size]
    correct_steps = tf.reduce_sum(tf.to_float(tf.logical_not(errors)), axis=-1)
    mean_partial_accuracy = tf.reduce_mean(
        tf.div(tf.minimum(correct_steps, gt_seq_lengths), gt_seq_lengths))
    return mean_complete_accuracy, mean_partial_accuracy
def generate_action_mask(features):
    """Computes the decode mask from "task" and "verb_refs"."""
    eos_positions = tf.to_int32(
        tf.expand_dims(tf.where(tf.equal(features["task"], 1))[:, 1], 1))
    decode_mask = tf.cumsum(tf.to_int32(
        tf.logical_and(
            tf.equal(features["verb_refs"][:, :, 0], eos_positions),
            tf.equal(features["verb_refs"][:, :, 1], eos_positions + 1))),
                            axis=-1)
    decode_mask = tf.sequence_mask(tf.reduce_sum(
        tf.to_int32(tf.less(decode_mask, 1)), -1),
                                   maxlen=tf.shape(decode_mask)[1])
    return decode_mask
Exemple #26
0
def ComputeChainStats(chain, target_mean, num_leapfrog_steps):
    # Chain is [num_steps, batch, num_dims]
    num_steps = tf.shape(chain)[0]
    counts = tf.to_float(tf.range(1, num_steps + 1))
    chain_mean = tf.cumsum(chain, 0) / counts[:, tf.newaxis, tf.newaxis]

    bias = target_mean - tf.reduce_mean(chain_mean, 1)
    variance = tf.reduce_mean(
        tf.square(chain_mean - tf.reduce_mean(chain_mean, 1, keep_dims=True)),
        1)
    inst_bias = target_mean - tf.reduce_mean(chain, 1)
    inst_variance = tf.reduce_mean(tf.square(target_mean - chain), 1)

    def reducer(_, idx):
        chain_mean = tf.reduce_mean(chain[idx // 2:idx], 0)
        bias = tf.reduce_mean(target_mean - chain_mean, 0)
        variance = tf.reduce_mean(
            tf.square(chain_mean - tf.reduce_mean(chain_mean, 0)), 0)
        return bias, variance

    indices = 1 + tf.range(num_steps)
    warmupped_bias, warmupped_variance = tf.scan(reducer,
                                                 indices,
                                                 initializer=(chain[0, 0],
                                                              chain[0, 0]))

    half_steps = num_steps // 2
    half_chain = chain[half_steps:]

    error_sq = tf.reduce_mean(
        tf.square(tf.reduce_mean(half_chain, 0) - target_mean), 0)

    ess = utils.EffectiveSampleSize(half_chain) / tf.to_float(half_steps)
    ess_per_grad = ess / tf.to_float(num_leapfrog_steps)
    rhat = tfp.mcmc.potential_scale_reduction(half_chain)
    autocorr = tf.reduce_mean(
        utils.SanitizedAutoCorrelation(half_chain, 0, max_lags=300), 1)

    return ChainStats(bias=bias,
                      variance=variance,
                      error_sq=error_sq,
                      inst_bias=inst_bias,
                      inst_variance=inst_variance,
                      ess=ess,
                      ess_per_grad=ess_per_grad,
                      rhat=rhat,
                      warmupped_bias=warmupped_bias,
                      warmupped_variance=warmupped_variance,
                      autocorr=autocorr)
Exemple #27
0
def top_p_logits(logits, p):
    """Nucleus sampling"""
    batch, _ = logits.shape.as_list()
    sorted_logits = tf.sort(logits, direction='DESCENDING', axis=-1)
    cumulative_probs = tf.cumsum(tf.nn.softmax(sorted_logits, axis=-1), axis=-1)
    indices = tf.stack([
        tf.range(0, batch),
        # number of indices to include
        tf.maximum(tf.reduce_sum(tf.cast(cumulative_probs <= p, tf.int32), axis=-1) - 1, 0),
    ], axis=-1)
    min_values = tf.gather_nd(sorted_logits, indices)
    return tf.where(
        logits < min_values,
        tf.ones_like(logits) * -1e10,
        logits,
    )
Exemple #28
0
def _distributional_to_value(value_d, size, subscale, threshold):
  """Get a scalar value out of a value distribution in distributional RL."""
  half = size // 2
  value_range = (tf.to_float(tf.range(-half, half)) + 0.5) * subscale
  probs = tf.nn.softmax(value_d)

  if threshold == 0.0:
    return tf.reduce_sum(probs * value_range, axis=-1)

  # accumulated_probs[..., i] is the sum of probabilities in buckets upto i
  # so it is the probability that value <= i'th bucket value
  accumulated_probs = tf.cumsum(probs, axis=-1)
  # New probs are 0 on all lower buckets, until the threshold
  probs = tf.where(accumulated_probs < threshold, tf.zeros_like(probs), probs)
  probs /= tf.reduce_sum(probs, axis=-1, keepdims=True)  # Re-normalize.
  return tf.reduce_sum(probs * value_range, axis=-1)
Exemple #29
0
    def test_readme_example(self):
        data = tf.random.uniform((128, 128), 0, 10, dtype=tf.int32)
        histogram = tf.bincount(data, minlength=10, maxlength=10)
        cdf = tf.cumsum(histogram, exclusive=False)
        cdf = tf.pad(cdf, [[1, 0]])
        cdf = tf.reshape(cdf, [1, 1, -1])

        data = tf.cast(data, tf.int16)
        encoded = range_coding_ops.range_encode(data, cdf, precision=14)
        decoded = range_coding_ops.range_decode(encoded,
                                                tf.shape(data),
                                                cdf,
                                                precision=14)

        with self.cached_session() as sess:
            self.assertAllEqual(*sess.run((data, decoded)))
def categorical_case(pmf, fns, rand=None):
  """Returns the outputs of fns[i] with probability pmf[i].

  Args:
    pmf: A 1-D tensor of probabilities, the probability mass function.
    fns: A list of callables that return tensors, same length as pmf.
    rand: An optional scalar between 0.0 and 1.0, the output of an RNG.

  Returns:
    A tensor, the output of fns[i] with probability pmf[i].
  """
  rand = tf.random_uniform([]) if rand is None else rand
  cmf = tf.pad(tf.cumsum(pmf), [(1, 0)])
  cmf = [cmf[i] for i in range(len(fns) + 1)]
  preds = [(rand >= a) & (rand < b) for a, b in zip(cmf[:-1], cmf[1:])]
  return tf.case(list(zip(preds, fns)), exclusive=True)