def boolean_mask(boxlist, indicator, fields=None, scope=None,
                 use_static_shapes=False, indicator_sum=None):
  """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
  with tf.name_scope(scope, 'BooleanMask'):
    if indicator.shape.ndims != 1:
      raise ValueError('indicator should have rank 1')
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be a boolean tensor')
    if use_static_shapes:
      if not (indicator_sum and isinstance(indicator_sum, int)):
        raise ValueError('`indicator_sum` must be a of type int')
      selected_positions = tf.cast(indicator, dtype=tf.float32)
      indexed_positions = tf.cast(
          tf.multiply(
              tf.cumsum(selected_positions), selected_positions),
          dtype=tf.int32)
      one_hot_selector = tf.one_hot(
          indexed_positions - 1, indicator_sum, dtype=tf.float32)
      sampled_indices = tf.cast(
          tf.tensordot(
              tf.cast(tf.range(tf.shape(indicator)[0]), dtype=tf.float32),
              one_hot_selector,
              axes=[0, 0]),
          dtype=tf.int32)
      return gather(boxlist, sampled_indices, use_static_shapes=True)
    else:
      subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
      if fields is None:
        fields = boxlist.get_extra_fields()
      for field in fields:
        if not boxlist.has_field(field):
          raise ValueError('boxlist must contain all specified fields')
        subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
        subboxlist.add_field(field, subfieldlist)
      return subboxlist
Esempio n. 2
0
 def testMaskingForwardBias(self):
     l = tf.keras.layers.Dense(12)
     ml = layers.MaskedLayer(l, name='test')
     x = tf.random.uniform((3, 5))
     # Bulding the layer and initializing the parameters.
     ml(x)
     l_bias = ml.layer.weights[1]
     b_mask = tf.random.uniform(l_bias.shape, maxval=2, dtype=tf.int32)
     # To get pruned parameters.
     b_mask_not_bool = tf.logical_not(tf.cast(b_mask, tf.bool))
     ml.set_mask(b_mask, is_bias=True)
     with tf.GradientTape() as tape:
         y = ml(x)
         # All weights under the mask expected to be zero after forward call.
         self.assertEqual(
             tf.math.count_nonzero(tf.boolean_mask(l_bias,
                                                   b_mask_not_bool)), 0)
         loss = tf.reduce_sum(y)
     grads = tape.gradient(loss, l.variables)
     optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
     optimizer.apply_gradients(list(zip(grads, l.variables)))
     # Weights are updated and they are not necesarrily zero anymore.
     self.assertNotEqual(
         tf.math.count_nonzero(tf.boolean_mask(l_bias, b_mask_not_bool)), 0)
     # All weights under the mask expected to be zero after forward call.
     # Don't need the return value.
     ml(x)
     self.assertEqual(
         tf.math.count_nonzero(tf.boolean_mask(l_bias, b_mask_not_bool)), 0)
Esempio n. 3
0
def get_discriminator_loss(learner_agent_output, env_output,
                           actor_agent_output, actor_action, reward_clipping,
                           discounting, baseline_cost, entropy_cost,
                           num_steps):
    """Discriminator loss."""
    del actor_agent_output
    del actor_action
    del reward_clipping
    del discounting
    del baseline_cost
    del entropy_cost

    first_true = utils.get_first_true_column(
        env_output.observation['disc_mask'])
    output_logits = learner_agent_output.policy_logits
    output_logits = tf.squeeze(output_logits, axis=1)
    output_logits = tf.boolean_mask(output_logits, first_true)
    output_affine_a, output_affine_b = learner_agent_output.baseline

    # Get the first true.
    labels = tf.cast(env_output.observation['label'], tf.float32)
    labels = tf.boolean_mask(labels, first_true)

    positive_label = tf.equal(labels, tf.constant(1.0))
    positive_logits = tf.boolean_mask(output_logits, positive_label)
    tf.summary.histogram('distribution/sigmoid_positive_logits',
                         tf.sigmoid(positive_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/positive_logits',
                         positive_logits,
                         step=num_steps)

    negative_label = tf.equal(labels, tf.constant(0.0))
    negative_logits = tf.boolean_mask(output_logits, negative_label)
    tf.summary.histogram('distribution/sigmoid_negative_logits',
                         tf.sigmoid(negative_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/negative_logits',
                         negative_logits,
                         step=num_steps)

    tf.summary.scalar('labels/positive_label',
                      tf.reduce_mean(tf.cast(positive_label, tf.float32)),
                      step=num_steps)

    tf.summary.scalar('labels/labels', tf.reduce_mean(labels), step=num_steps)
    tf.summary.scalar('affine_transform/a',
                      tf.reduce_mean(output_affine_a),
                      step=num_steps)
    tf.summary.scalar('affine_transform/b',
                      tf.reduce_mean(output_affine_b),
                      step=num_steps)

    cross_entropy = tf.nn.weighted_cross_entropy_with_logits(
        labels=labels, logits=output_logits, pos_weight=5)
    return cross_entropy
Esempio n. 4
0
def _get_discriminator_logits(learner_agent_output, env_output,
                              actor_agent_output, actor_action,
                              reward_clipping, discounting, baseline_cost,
                              entropy_cost, num_steps):
    """Discriminator loss."""
    del actor_agent_output
    del actor_action
    del reward_clipping
    del discounting
    del baseline_cost
    del entropy_cost

    first_true = utils.get_first_true_column(
        env_output.observation['disc_mask'])
    # Shape of output_logits:[time, batch].
    output_logits = learner_agent_output.policy_logits
    # Shape of output_logits:[batch].
    output_logits = tf.boolean_mask(output_logits, first_true)
    output_affine_a, output_affine_b = learner_agent_output.baseline

    # Get the first true.
    labels = tf.cast(env_output.observation['label'], tf.float32)
    tf.summary.scalar('labels/mean_labels before masking',
                      tf.reduce_mean(labels),
                      step=num_steps)
    # Shape of labels:[batch].
    labels = tf.boolean_mask(labels, first_true)

    positive_label = tf.equal(labels, tf.constant(1.0))
    positive_logits = tf.boolean_mask(output_logits, positive_label)
    tf.summary.histogram('distribution/sigmoid_positive_logits',
                         tf.sigmoid(positive_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/positive_logits',
                         positive_logits,
                         step=num_steps)

    negative_label = tf.equal(labels, tf.constant(0.0))
    negative_logits = tf.boolean_mask(output_logits, negative_label)
    tf.summary.histogram('distribution/sigmoid_negative_logits',
                         tf.sigmoid(negative_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/negative_logits',
                         negative_logits,
                         step=num_steps)
    tf.summary.scalar('labels/positive_label_ratio',
                      tf.reduce_mean(tf.cast(positive_label, tf.float32)),
                      step=num_steps)
    tf.summary.scalar('affine_transform/a',
                      tf.reduce_mean(output_affine_a),
                      step=num_steps)
    tf.summary.scalar('affine_transform/b',
                      tf.reduce_mean(output_affine_b),
                      step=num_steps)
    # Shape: [batch]
    return labels, output_logits
Esempio n. 5
0
 def result(self):
     """Computes the expected calibration error."""
     non_empty = tf.math.not_equal(self.counts, 0)
     correct_sums = tf.boolean_mask(self.correct_sums, non_empty)
     prob_sums = tf.boolean_mask(self.prob_sums, non_empty)
     counts = tf.boolean_mask(self.counts, non_empty)
     accs = correct_sums / counts
     confs = prob_sums / counts
     total_count = tf.reduce_sum(counts)
     return tf.reduce_sum(counts / total_count * tf.abs(accs - confs))
  def test_sampled_weights_follow_correct_distribution(self):
    seed = test_util.test_seed(sampler_type='stateless')
    design_seed, true_weights_seed, sampled_weights_seed = samplers.split_seed(
        seed, 3, 'test_sampled_weights_follow_correct_distribution')
    num_timesteps = 10
    num_features = 2
    batch_shape = [3, 1]
    design_matrix = samplers.normal(
        batch_shape + [num_timesteps, num_features], seed=design_seed)
    true_weights = samplers.normal(
        batch_shape + [num_features, 1], seed=true_weights_seed) * 10.0
    targets = tf.matmul(design_matrix, true_weights)
    is_missing = tf.convert_to_tensor([False, False, False, True, True,
                                       False, False, True, False, False],
                                      dtype=tf.bool)
    prior_scale = tf.convert_to_tensor(5.)
    likelihood_scale = tf.convert_to_tensor(0.1)

    # Analytically compute the true posterior distribution on weights.
    valid_design_matrix = tf.boolean_mask(design_matrix, ~is_missing, axis=-2)
    valid_targets = tf.boolean_mask(targets, ~is_missing, axis=-2)
    num_valid_observations = tf.shape(valid_design_matrix)[-2]
    weights_posterior_mean, weights_posterior_cov, _ = linear_gaussian_update(
        prior_mean=tf.zeros([num_features, 1]),
        prior_cov=tf.eye(num_features) * prior_scale**2,
        observation_matrix=tfl.LinearOperatorFullMatrix(valid_design_matrix),
        observation_noise=tfd.MultivariateNormalDiag(
            loc=tf.zeros([num_valid_observations]),
            scale_diag=likelihood_scale * tf.ones([num_valid_observations])),
        x_observed=valid_targets)

    # Check that the empirical moments of sampled weights match the true values.
    sampled_weights = parallel_for.pfor(
        lambda i: gibbs_sampler._resample_weights(  # pylint: disable=g-long-lambda
            design_matrix=design_matrix,
            target_residuals=targets[..., 0],
            observation_noise_scale=likelihood_scale,
            weights_prior_scale=prior_scale,
            is_missing=is_missing,
            seed=sampled_weights_seed),
        10000)
    sampled_weights_mean = tf.reduce_mean(sampled_weights, axis=0)
    centered_weights = sampled_weights - weights_posterior_mean[..., 0]
    sampled_weights_cov = tf.reduce_mean(centered_weights[..., :, tf.newaxis] *
                                         centered_weights[..., tf.newaxis, :],
                                         axis=0)

    (sampled_weights_mean_, weights_posterior_mean_,
     sampled_weights_cov_, weights_posterior_cov_) = self.evaluate((
         sampled_weights_mean, weights_posterior_mean[..., 0],
         sampled_weights_cov, weights_posterior_cov))
    self.assertAllClose(sampled_weights_mean_, weights_posterior_mean_,
                        atol=0.01, rtol=0.05)
    self.assertAllClose(sampled_weights_cov_, weights_posterior_cov_,
                        atol=0.01, rtol=0.05)
Esempio n. 7
0
    def map_fn(filename):
      metadata = tf.io.read_file(filename + '_times.txt')
      metadata = tf.strings.split(tf.strings.split(metadata, sep='\n'), sep=' ')
      event_times = tf.strings.to_number(metadata[:-1, :1], out_type=tf.float32)
      event_times = event_times.to_tensor()
      event_times = tf.squeeze(event_times)
      if event_filter:
        mask = tf.equal(metadata[:, 2:3], event_filter).to_tensor()
        mask = tf.reshape(mask, [-1])
        event_times = tf.boolean_mask(event_times, mask)
      event_indices = tf.random.uniform([], maxval=tf.shape(event_times)[0], dtype=tf.int32)
      event_times = tf.gather(event_times, event_indices)

      video = tf.io.read_file(filename + '_denoised_thumb.mp4')
      decoded_video = tfio.experimental.ffmpeg.decode_video(video)
      video_start_index = tf.cast((event_times - (example_secs / 2.0)) * video_frame_rate, tf.int32)
      clipped_video = decoded_video[video_start_index:(video_start_index + video_length), ...]
      clipped_video = tf.expand_dims(clipped_video, axis=0)

      audio = tf.io.read_file(filename + '_denoised.wav')
      decoded_audio, metadata_audio_sample_rate = tf.audio.decode_wav(audio, desired_channels=1)
      decoded_audio = tf.expand_dims(tf.squeeze(decoded_audio), axis=0)
      audio_start_index = tf.cast(video_start_index / video_frame_rate * audio_sample_rate, tf.int32)
      clipped_audio = decoded_audio[:, audio_start_index:(audio_start_index+audio_length)]
      return tf.data.Dataset.from_tensor_slices({'frames':clipped_video, 'audio':clipped_audio})
Esempio n. 8
0
 def testSetBiasMasking(self):
     l = tf.keras.layers.Dense(12, bias_initializer='glorot_uniform')
     ml = layers.MaskedLayer(l, name='test')
     with self.assertRaises(AssertionError):
         ml.set_mask(tf.zeros(10), is_bias=True)
     x = tf.random.uniform((3, 5))
     # Bulding the layer and initializing the parameters.
     ml(x)
     with self.assertRaises(AssertionError):
         # Wrong mask_shape
         ml.set_mask(tf.zeros(5, 12), is_bias=True)
     l_bias = ml.layer.weights[1]
     b_mask = tf.random.uniform(l_bias.shape, maxval=2, dtype=tf.int32)
     # To get pruned parameters.
     b_mask_not_bool = tf.logical_not(tf.cast(b_mask, tf.bool))
     ml.set_mask(b_mask, is_bias=True)
     self.assertIsInstance(ml.mask_bias, tf.Variable)
     self.assertAllEqual(b_mask, ml.mask_bias.numpy())
     self.assertEqual(l_bias.dtype, ml.mask_bias.dtype)
     # Check the assign works.
     b_mask = tf.random.uniform(l_bias.shape, maxval=2, dtype=tf.int32)
     ml.set_mask(b_mask, is_bias=True)
     self.assertAllEqual(b_mask, ml.mask_bias.numpy())
     self.assertAllEqual(ml.mask_weight.numpy(),
                         tf.ones_like(ml.mask_weight))
     # weights are not masked yet
     self.assertNotEqual(
         tf.math.count_nonzero(tf.boolean_mask(l_bias, b_mask_not_bool)), 0)
Esempio n. 9
0
    def test_compare_ragged_with_masks(self, layer):
        vocab_size = 100
        timestep = 20
        units = 32
        embedder = keras.layers.Embedding(input_dim=vocab_size,
                                          output_dim=units)
        layer = layer(units, return_sequences=True)
        data = tf.constant(
            np.random.RandomState(0).randint(0, vocab_size,
                                             [timestep, timestep]))
        mask = tf.sequence_mask(tf.range(1, timestep + 1))
        data_ragged = tf.ragged.boolean_mask(data, mask)

        outputs = []
        devices = [test_utils.device(should_use_gpu=False)]
        if tf.test.is_gpu_available():
            devices.append(test_utils.device(should_use_gpu=True))
        for device in devices:
            with device:
                outputs.append(
                    tf.boolean_mask(layer(embedder(data), mask=mask), mask))
                outputs.append(layer(embedder(data_ragged)).values)

        for i in range(len(outputs) - 1):
            self.assertAllClose(outputs[i], outputs[i + 1], atol=1e-4)
Esempio n. 10
0
 def calculate_inliers(self, dot_bounding_boxes_sizes_cv):
     """Calculates inliers of projected against found points, potentially including the found dot bounding box sizes in addition to their positions."""
     projected_points_ordered = self.projected_points[self.projected_indices, :]
     found_points_ordered = self.comparison_points_cv[self.comparison_indices, :]
     differences = projected_points_ordered - found_points_ordered
     if self.calculate_inliers_within_bounding_box:
         found_bb_sizes_ordered = dot_bounding_boxes_sizes_cv[self.comparison_indices, :]
         difference_bb_fractions = differences / found_bb_sizes_ordered
         abs_bb_fractions = np.abs(difference_bb_fractions)
         max_bb_fraction = np.max(abs_bb_fractions, axis = -1)
         inliers = max_bb_fraction < 1.0
     else:
         center_distances = np.linalg.norm(differences, axis = -1)
         inliers = center_distances < inlier_cutoff_px
     self.comparison_inlier_indices = tf.boolean_mask(self.comparison_indices, inliers)
     self.projected_inlier_indices = tf.boolean_mask(self.projected_indices, inliers)
    def _sample_paths(self, times, time_step, num_samples, random_type, skip,
                      seed):
        """Returns a sample of paths from the process."""
        initial_state = tf.zeros((self._dim, ), dtype=self._dtype)
        # Note that we need a finer simulation grid (determnied by `dt`) to compute
        # discount factors accurately. The `times` input might not be granular
        # enough for accurate calculations.
        times, keep_mask, _ = utils.prepare_grid(times=times,
                                                 time_step=time_step,
                                                 dtype=self._dtype)
        # Add zeros as a starting location
        dt = times[1:] - times[:-1]

        # xy_paths.shape = (num_samples, num_times, nfactors+nfactors^2)
        xy_paths = euler_sampling.sample(self._dim,
                                         self._drift_fn,
                                         self._volatility_fn,
                                         times,
                                         num_samples=num_samples,
                                         initial_state=initial_state,
                                         random_type=random_type,
                                         seed=seed,
                                         time_step=time_step,
                                         skip=skip)

        x_paths = xy_paths[..., :self._factors]
        y_paths = xy_paths[..., self._factors:]

        f_0_t = self._instant_forward_rate_fn(times)  # shape=(num_times,)
        rate_paths = tf.math.reduce_sum(
            x_paths, axis=-1) + f_0_t  # shape=(num_samples, num_times)

        discount_factor_paths = tf.math.exp(-rate_paths[:, :-1] * dt)
        discount_factor_paths = tf.concat(
            [
                tf.ones(
                    (num_samples, 1), dtype=self._dtype), discount_factor_paths
            ],
            axis=1)  # shape=(num_samples, num_times)
        discount_factor_paths = utils.cumprod_using_matvec(
            discount_factor_paths)

        return (tf.boolean_mask(rate_paths, keep_mask, axis=1),
                tf.boolean_mask(discount_factor_paths, keep_mask, axis=1),
                tf.boolean_mask(x_paths, keep_mask, axis=1),
                tf.boolean_mask(y_paths, keep_mask, axis=1))
Esempio n. 12
0
 def _apply_mask(x):
     result = tf.cond(
         tf.math.reduce_any(mask),
         # Select True element along columns (time dimension).
         lambda: tf.boolean_mask(x, mask),
         # Take the 0th col if mask is all False
         lambda: tf.gather(x, 0, axis=1))
     return result
Esempio n. 13
0
    def test_posterior_on_nonzero_subset_matches_bayesian_regression(self):
        # Generate a synthetic regression task.
        design_matrix, _, targets = self.evaluate(
            self._random_regression_task(num_features=5,
                                         num_outputs=20,
                                         batch_shape=[2],
                                         seed=test_util.test_seed()))

        # Utilities to extract values for nonzero-weight features.
        nonzeros = tf.convert_to_tensor([True, False, True, False, True])
        nonzero_subvector = (
            lambda x: tf.boolean_mask(x, nonzeros, axis=ps.rank(x) - 1))
        nonzero_submatrix = lambda x: tf.boolean_mask(  # pylint: disable=g-long-lambda
            tf.boolean_mask(x, nonzeros, axis=ps.rank(x) - 2),
            nonzeros,
            axis=ps.rank(x) - 1)

        # Compute the weight posterior mean and precision for these nonzeros.
        sampler = spike_and_slab.SpikeSlabSampler(design_matrix)
        initial_state = sampler._initialize_sampler_state(targets=targets,
                                                          nonzeros=nonzeros)

        # Compute the analytic posterior for the regression problem restricted to
        # only the selected features. Note that by slicing a submatrix of the
        # prior precision we are implicitly *conditioning* on having observed the
        # other weights to be zero (which is sensible in this case), versus slicing
        # into the covariance which would give the marginal (unconditional) prior
        # on the selected weights.
        (restricted_weights_posterior_mean,
         restricted_weights_posterior_prec) = tfd.mvn_conjugate_linear_update(
             prior_scale=tf.linalg.cholesky(
                 tf.linalg.inv(
                     nonzero_submatrix(sampler.weights_prior_precision))),
             linear_transformation=nonzero_subvector(design_matrix),
             likelihood_scale=tf.eye(20),
             observation=targets)

        # The sampler's posterior should match the posterior from the restricted
        # problem.
        self.assertAllClose(
            nonzero_subvector(initial_state.conditional_weights_mean),
            restricted_weights_posterior_mean)
        self.assertAllClose(
            nonzero_submatrix(
                initial_state.conditional_posterior_precision_chol),
            tf.linalg.cholesky(restricted_weights_posterior_prec.to_dense()))
Esempio n. 14
0
    def compute_loss_and_acc(
        self,
        rnn_output_logits: tf.Tensor,
        batch_features: Dict[str, tf.Tensor],
        batch_labels: Dict[str, tf.Tensor],
    ) -> LanguageModelLoss:
        """
        Args:
            rnn_output_logits: tf.float32 Tensor of shape [B, T, V], representing
                logits as computed by the language model.
            target_token_seq: tf.int32 Tensor of shape [B, T], representing
                the target token sequence.

        Returns:
            LanguageModelLoss tuple, containing both the average per-token loss
            as well as the number of (non-padding) token predictions and how many
            of those were correct.
        
        Note:
            We assume that the two inputs are shifted by one from each other, i.e.,
            that rnn_output_logits[i, t, :] are the logits for sample i after consuming
            input t; hence its target output is assumed to be target_token_seq[i, t+1].
        """

        target_token_seq = tf.cast(batch_labels["target_value"], tf.int32)
        num_graphs = tf.cast(batch_features["num_graphs_in_batch"], tf.float32)

        mask = tf.math.not_equal(
            target_token_seq[:, 1:],
            self.vocab_target.get_id_or_unk(self.vocab_target.get_pad()))

        num_tokens = tf.math.count_nonzero(mask)
        prediction = tf.cast(tf.argmax(rnn_output_logits, 2), tf.int32)
        compared = tf.cast(tf.math.equal(target_token_seq[:, 1:], prediction),
                           tf.int32) * tf.cast(mask, tf.int32)
        num_correct_tokens = tf.math.count_nonzero(compared)

        # 7# Mask out CE loss for padding tokens
        token_ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=tf.boolean_mask(rnn_output_logits, mask),
            labels=tf.boolean_mask(target_token_seq[:, 1:], mask))
        token_ce_loss = tf.reduce_sum(token_ce_loss)

        return LanguageModelLoss(token_ce_loss, num_tokens, num_correct_tokens)
    def compute_loss_and_acc(self, rnn_output_logits: tf.Tensor,
                             target_token_seq: tf.Tensor) -> LanguageModelLoss:
        """
        Args:
            rnn_output_logits: tf.float32 Tensor of shape [B, T, V], representing
                logits as computed by the language model.
            target_token_seq: tf.int32 Tensor of shape [B, T], representing
                the target token sequence.

        Returns:
            LanguageModelLoss tuple, containing both the average per-token loss
            as well as the number of (non-padding) token predictions and how many
            of those were correct.
        
        Note:
            We assume that the two inputs are shifted by one from each other, i.e.,
            that rnn_output_logits[i, t, :] are the logits for sample i after consuming
            input t; hence its target output is assumed to be target_token_seq[i, t+1].
        """
        # TODO 5# 4) Compute CE loss for all but the last timestep:
        token_ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=target_token_seq[:, 1:],
            logits=rnn_output_logits[:, :-1, :])
        # token_ce_loss = tf.reduce_mean(token_ce_loss) becomes redundant, because I do it at TODO 7

        # TODO 6# Compute number of (correct) predictions
        pad_id = self.vocab.get_id_or_unk(self.vocab.get_pad())
        mask = tf.logical_not(tf.equal(target_token_seq, pad_id))[:, 1:]

        # compute predictions correctness and drop the padding by applying the mask
        predictions_status = tf.boolean_mask(
            tf.equal(target_token_seq[:, 1:],
                     tf.argmax(rnn_output_logits[:, :-1], axis=2)), mask)

        num_tokens = len(predictions_status)
        num_correct_tokens = tf.math.count_nonzero(predictions_status,
                                                   dtype=tf.float32)

        # TODO 7# Mask out CE loss for padding tokens
        token_ce_loss = tf.boolean_mask(token_ce_loss, mask)
        token_ce_loss = tf.reduce_mean(token_ce_loss)

        return LanguageModelLoss(token_ce_loss, num_tokens, num_correct_tokens)
 def get_implied_vol(self, time, strike, paths, spot, r, dtype):
   r = tf.convert_to_tensor(r, dtype=dtype)
   discount_factor = tf.math.exp(-r * time)
   paths = tf.boolean_mask(paths, tf.math.logical_not(tf.math.is_nan(paths)))
   option_value = tf.math.reduce_mean(tf.nn.relu(paths - strike))
   iv = bs.implied_vol(
       prices=discount_factor * option_value,
       strikes=strike,
       expiries=time,
       spots=spot,
       discount_factors=discount_factor,
       dtype=dtype)
   return iv
Esempio n. 17
0
def _create_pde_time_grid(exercise_times, time_step_fd, dtype):
    """Create PDE time grid."""
    unique_exercise_times, _ = tf.unique(tf.reshape(exercise_times,
                                                    shape=[-1]))
    longest_exercise_time = unique_exercise_times[-1]
    if time_step_fd is None:
        time_step_fd = longest_exercise_time / 100.0

    pde_time_grid = tf.concat([
        unique_exercise_times,
        tf.range(0.0, longest_exercise_time, time_step_fd, dtype=dtype)
    ],
                              axis=0)
    # This time grid is now sorted and contains the Bermudan exercise times
    pde_time_grid = tf.sort(pde_time_grid, name='sort_pde_time_grid')
    pde_time_grid_dt = pde_time_grid[1:] - pde_time_grid[:-1]
    pde_time_grid_dt = tf.concat([[100.0], pde_time_grid_dt], axis=-1)
    # Remove duplicates.
    mask = tf.math.greater(pde_time_grid_dt, _PDE_TIME_GRID_TOL)
    pde_time_grid = tf.boolean_mask(pde_time_grid, mask)
    pde_time_grid_dt = tf.boolean_mask(pde_time_grid_dt, mask)

    return pde_time_grid, pde_time_grid_dt
Esempio n. 18
0
def prepare_grid(*, times, time_step, dtype):
    """Prepares grid of times for path generation.

  Args:
    times:  Rank 1 `Tensor` of increasing positive real values. The times at
      which the path points are to be evaluated.
    time_step: Rank 0 real `Tensor`. Maximal distance between points in
      resulting grid.
    dtype: `tf.Dtype` of the input and output `Tensor`s.

  Returns:
    Tuple `(all_times, mask, time_points)`.
    `all_times` is a 1-D real `Tensor` containing all points from 'times` and
    the uniform grid of points between `[0, times[-1]]` with grid size equal to
    `time_step`. The `Tensor` is sorted in ascending order and may contain
    duplicates.
    `mask` is a boolean 1-D `Tensor` of the same shape as 'all_times', showing
    which elements of 'all_times' correspond to THE values from `times`.
    Guarantees that times[0]=0 and mask[0]=False.
    `time_indices`. An integer `Tensor` of the same shape as `times` indicating
    `times` indices in `all_times`.
  """
    grid = tf.range(0.0, times[-1], time_step, dtype=dtype)
    all_times = tf.concat([times, grid], axis=0)
    # Remove duplicate points
    # all_times = tf.unique(all_times).y
    # Sort sequence. Identify the time indices of interest
    # TODO(b/169400743): use tf.sort instead of argsort and casting when XLA
    # float64 support is extended for tf.sort
    args = tf.argsort(tf.cast(all_times, dtype=tf.float32))
    all_times = tf.gather(all_times, args)
    # Remove duplicate points
    duplicate_tol = 1e-10 if dtype == tf.float64 else 1e-6
    dt = all_times[1:] - all_times[:-1]
    dt = tf.concat([[1.0], dt], axis=-1)
    duplicate_mask = tf.math.greater(dt, duplicate_tol)
    all_times = tf.boolean_mask(all_times, duplicate_mask)

    time_indices = tf.searchsorted(all_times, times, out_type=tf.int32)
    # Create a boolean mask to identify the iterations that have to be recorded.
    mask_sparse = tf.sparse.SparseTensor(
        indices=tf.expand_dims(tf.cast(time_indices, dtype=tf.int64), axis=1),
        values=tf.fill(tf.shape(times), True),
        dense_shape=tf.shape(all_times, out_type=tf.int64))
    mask = tf.sparse.to_dense(mask_sparse)
    # all_times = tf.concat([[0.0], all_times], axis=0)
    # mask = tf.concat([[False], mask], axis=0)
    # time_indices = time_indices + 1
    return all_times, mask, time_indices
Esempio n. 19
0
def compress(condition, a, axis=None):
  """Compresses `a` by selecting values along `axis` with `condition` true.

  Uses `tf.boolean_mask`.

  Args:
    condition: 1-d array of bools. If `condition` is shorter than the array
      axis (or the flattened array if axis is None), it is padded with False.
    a: array_like. Could be an ndarray, a Tensor or any object that can
      be converted to a Tensor using `tf.convert_to_tensor`.
    axis: Optional. Axis along which to select elements. If None, `condition` is
      applied on flattened array.

  Returns:
    An ndarray.

  Raises:
    ValueError: if `condition` is not of rank 1.
  """
  condition = array_creation.asarray(condition, dtype=bool)
  a = array_creation.asarray(a)

  if condition.ndim != 1:
    raise ValueError('condition must be a 1-d array.')

  # `np.compress` treats scalars as 1-d arrays.
  if a.ndim == 0:
    a = ravel(a)

  if axis is None:
    a = ravel(a)
    axis = 0

  if axis < 0:
    axis += a.ndim

  assert axis >= 0 and axis < a.ndim

  # `tf.boolean_mask` requires the first dimensions of array and condition to
  # match. `np.compress` pads condition with False when it is shorter.
  condition_t = condition.data
  a_t = a.data
  if condition.shape[0] < a.shape[axis]:
    padding = tf.fill([a.shape[axis] - condition.shape[0]], False)
    condition_t = tf.concat([condition_t, padding], axis=0)
  return utils.tensor_to_ndarray(tf.boolean_mask(tensor=a_t, mask=condition_t,
                                                 axis=axis))
Esempio n. 20
0
def _grid_from_time_step(*, times, time_step, dtype):
    """Creates a time grid from an input time step."""
    grid = tf.range(0.0, times[-1], time_step, dtype=dtype)
    all_times = tf.concat([times, grid], axis=0)
    all_times = tf.sort(all_times)
    # Remove duplicate points
    duplicate_tol = 1e-10 if dtype == tf.float64 else 1e-6
    dt = all_times[1:] - all_times[:-1]
    dt = tf.concat([[1.0], dt], axis=-1)
    duplicate_mask = tf.math.greater(dt, duplicate_tol)
    all_times = tf.boolean_mask(all_times, duplicate_mask)
    time_indices = tf.searchsorted(all_times, times, out_type=tf.int32)
    # Move `time_indices` to the left, if the requested `times` are removed from
    # `all_times` during deduplication
    time_indices = tf.where(
        tf.gather(all_times, time_indices) - times > duplicate_tol,
        time_indices - 1, time_indices)
    return all_times, time_indices
Esempio n. 21
0
 def boolean_mask(self, mask, axis=None):
   """See tf.boolean_mask."""
   return self._apply_op(lambda t: tf.boolean_mask(t, mask, axis=axis))
Esempio n. 22
0
def _replace_event_shape_in_shape_tensor(
    input_shape, event_shape_in, event_shape_out, validate_args):
  """Replaces the rightmost dims in a `Tensor` representing a shape.

  Args:
    input_shape: a rank-1 `Tensor` of integers
    event_shape_in: the event shape expected to be present in rightmost dims
      of `shape_in`.
    event_shape_out: the event shape with which to replace `event_shape_in` in
      the rightmost dims of `input_shape`.
    validate_args: Python `bool` indicating whether arguments should
      be checked for correctness.

  Returns:
    output_shape: A rank-1 integer `Tensor` with the same contents as
      `input_shape` except for the event dims, which are replaced with
      `event_shape_out`.
  """
  output_tensorshape, is_validated = _replace_event_shape_in_tensorshape(
      tensorshape_util.constant_value_as_shape(input_shape),
      event_shape_in,
      event_shape_out)

  # TODO(b/124240153): Remove map(tf.identity, deps) once tf.function
  # correctly supports control_dependencies.
  validation_dependencies = (
      map(tf.identity, (event_shape_in, event_shape_out))
      if validate_args else ())

  if (tensorshape_util.is_fully_defined(output_tensorshape) and
      (is_validated or not validate_args)):
    with tf.control_dependencies(validation_dependencies):
      output_shape = tf.convert_to_tensor(
          output_tensorshape, name='output_shape', dtype_hint=tf.int32)
    return output_shape, output_tensorshape

  with tf.control_dependencies(validation_dependencies):
    event_shape_in_ndims = (
        tf.size(event_shape_in)
        if tensorshape_util.num_elements(event_shape_in.shape) is None else
        tensorshape_util.num_elements(event_shape_in.shape))
    input_non_event_shape, input_event_shape = tf.split(
        input_shape, num_or_size_splits=[-1, event_shape_in_ndims])

  additional_assertions = []
  if is_validated:
    pass
  elif validate_args:
    # Check that `input_event_shape` and `event_shape_in` are compatible in the
    # sense that they have equal entries in any position that isn't a `-1` in
    # `event_shape_in`. Note that our validations at construction time ensure
    # there is at most one such entry in `event_shape_in`.
    mask = event_shape_in >= 0
    explicit_input_event_shape = tf.boolean_mask(input_event_shape, mask=mask)
    explicit_event_shape_in = tf.boolean_mask(event_shape_in, mask=mask)
    additional_assertions.append(
        assert_util.assert_equal(
            explicit_input_event_shape,
            explicit_event_shape_in,
            message='Input `event_shape` does not match `event_shape_in`.'))
    # We don't explicitly additionally verify
    # `tf.size(input_shape) > tf.size(event_shape_in)` since `tf.split`
    # already makes this assertion.

  with tf.control_dependencies(additional_assertions):
    output_shape = tf.concat([input_non_event_shape, event_shape_out], axis=0,
                             name='output_shape')

  return output_shape, output_tensorshape
Esempio n. 23
0
def lower_bound_info_nce(logu,
                         joint_sample_mask=None,
                         validate_args=False,
                         name=None):
    """InfoNCE lower bound on mutual information.

  InfoNCE lower bound is proposed in [van den Oord et al. (2018)][1]
  based on noise contrastive estimation (NCE).
  ```none
  I(X; Y) >= 1/K sum(i=1:K, log( p_joint[i] / p_marginal[i])),
  ```
  where the numerator and the denominator are, respectively,
  ```none
  p_joint[i] = p(x[i] | y[i]) = exp( f(x[i], y[i]) ),
  p_marginal[i] = 1/K sum(j=1:K, p(x[i] | y[j]) )
                = 1/K sum(j=1:K, exp( f(x[i], y[j]) ) ),
  ```
  and `(x[i], y[i]), i=1:K` are samples from joint distribution `p(x, y)`.
  Pairs of points (x, y) are scored using a critic function `f`.

  Example:

  `X`, `Y` are samples from a joint Gaussian distribution, with
  correlation `0.8` and both of dimension `1`.

  ```python
  batch_size, rho, dim = 10000, 0.8, 1
  y, eps = tf.split(
      value=tf.random.normal(shape=(2 * batch_size, dim), seed=7),
      num_or_size_splits=2, axis=0)
  mean, conditional_stddev = rho * y, tf.sqrt(1. - tf.square(rho))
  x = mean + conditional_stddev * eps

  # Conditional distribution of p(x|y)
  conditional_dist = tfd.MultivariateNormalDiag(
      mean, scale_identity_multiplier=conditional_stddev)

  # Scores/unnormalized likelihood of pairs of samples `x[i], y[j]`
  # (The scores has its shape [x_batch_size, distibution_batch_size]
  # as the `lower_bound_info_nce` requires `scores[i, j] = f(x[i], y[j])
  # = log p(x[i] | y[j])`.)
  scores = conditional_dist.log_prob(x[:, tf.newaxis, :])

  # Mask for joint samples
  joint_sample_mask = tf.eye(batch_size, dtype=bool)

  # InfoNCE lower bound on mutual information
  lower_bound_info_nce(logu=scores, joint_sample_mask=joint_sample_mask)
  ```

  Args:
    logu: `float`-like `Tensor` of size `[batch_size_1, batch_size_2]`
      representing critic scores (scores) for pairs of points (x, y) with
      `logu[i, j] = f(x[i], y[j])`.
    joint_sample_mask: `bool`-like `Tensor` of the same size as `logu`
      masking the positive samples by `True`, i.e. samples from joint
      distribution `p(x, y)`.
      Default value: `None`. By default, an identity matrix is constructed as
      the mask.
    validate_args: Python `bool`, default `False`. Whether to validate input
      with asserts. If `validate_args` is `False`, and the inputs are invalid,
      correct behavior is not guaranteed.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'lower_bound_info_nce').

  Returns:
    lower_bound: `float`-like `scalar` for lower bound on mutual information.

  #### References

  [1]: Aaron van den Oord, Yazhe Li, Oriol Vinyals. Representation
       Learning with Contrastive Predictive Coding. _arXiv preprint
       arXiv:1807.03748_, 2018. https://arxiv.org/abs/1807.03748.
  """

    with tf.name_scope(name or 'lower_bound_info_nce'):
        # Follow the notation of eq.(12) of Poole et al. (2019)
        # On Variational Bounds of Mutual Information,
        # https://arxiv.org/abs/1905.06922, where the expectation is taken by
        # sampling.
        # The first term is `1/K * sum(i=1:K, f(x[i], y[i])`, where `K` is the
        # `batch_size` and `(x[i], y[i])` is the joint sample.
        # The second term is `1/K * sum(i=1:K, log(sum(j=1:K, exp(f(x[i], y[j]))))`,
        # where the joint samples are when `i=j`, and the marginal ones are `i!=j`.

        with tf.control_dependencies(
                _maybe_assert_float_matrix(logu, validate_args)):
            if joint_sample_mask is None:
                logu = tf.convert_to_tensor(logu,
                                            dtype_hint=tf.float32,
                                            name='logu')
                joint_term = tf.reduce_mean(tf.linalg.diag_part(logu),
                                            axis=[-1])

            else:
                logu, joint_sample_mask = _check_and_get_mask(
                    logu, joint_sample_mask, validate_args=validate_args)
                joint_term = tf.reduce_mean(tf.boolean_mask(
                    logu, joint_sample_mask),
                                            axis=[-1])

            log_n = tf.math.log(tf.cast(logu.shape[-1], logu.dtype))
            marginal_term = (tf.reduce_mean(
                tf.reduce_logsumexp(logu, axis=[-1]), axis=[-1]) - log_n)
            return joint_term - marginal_term
Esempio n. 24
0
def softquantiles(x,
                  quantiles,
                  quantile_width=None,
                  axis=-1,
                  may_squeeze=True,
                  **kwargs):
  """Computes soft quantiles via optimal transport.

  This operator takes advantage of the fact that an exhaustive softsort is not
  required to recover a single quantile. Instead, one can transport all
  input values in x onto only 3 weighted values. Target weights are adjusted so
  that those values in x that are transported to the middle value in the target
  vector y correspond to those concentrating around the quantile of interest.

  This idea generalizes to more quantiles, interleaving small weights on the
  quantile indices and bigger weights in between, corresponding to the gap from
  one desired quantile to the next one.

  Args:
   x: Tensor<float> of any shape.
   quantiles: list<float> the quantiles to be returned. It can also be a single
     float.
   quantile_width: (float) mass given to the bucket supposed to attract points
     whose value concentrate around the desired quantile value. Bigger width
     means that we allow the soft quantile to be a mixture of more points
     further away from the quantile. If None, the width is set at 1/n where n is
     the number of values considered (the size along the 'axis').
   axis: (int) the axis along which to compute the quantile.
   may_squeeze: (bool) should we squeeze the output tensor in case of a single
     quantile.
   **kwargs: see SoftQuantilizer for possible extra parameters.

  Returns:
    A Tensor<float> similar to the input tensor, but the axis dimension is
    replaced by the number of quantiles specified in the quantiles list.
    Hence, if only a quantile is requested (quantiles is a float) only one value
    in that axis is returned. When several quantiles are requested, the tensor
    will have that many values in that axis.

  Raises:
    tf.errors.InvalidArgumentError when the quantiles and quantile width are not
    correct, namely quantiles are either not in sorted order or the
    quantile_width is too large.
  """
  if isinstance(quantiles, float):
    quantiles = [quantiles]
  quantiles = tf.constant(quantiles, tf.float32)

  # Preprocesses submitted quantiles to check that they satisfy elementary
  # constraints.
  valid_quantiles = tf.boolean_mask(
      quantiles, tf.logical_and(quantiles > 0.0, quantiles < 1.0))
  num_quantiles = tf.shape(valid_quantiles)[0]

  # Includes values on both ends of [0,1].
  extended_quantiles = tf.concat([[0.0], valid_quantiles, [1.0]], axis=0)

  # Builds filler_weights in between the target quantiles.
  filler_weights = extended_quantiles[1:] - extended_quantiles[:-1]
  if quantile_width is None:
    quantile_width = tf.reduce_min(
        tf.concat(
            [filler_weights, [1.0 / tf.cast(tf.shape(x)[axis], dtype=x.dtype)]],
            axis=0))

  # Takes into account quantile_width in the definition of weights
  shift = -tf.ones(tf.shape(filler_weights), dtype=x.dtype)
  shift = shift + 0.5 * (
      tf.one_hot(0, num_quantiles + 1) +
      tf.one_hot(num_quantiles, num_quantiles + 1))
  filler_weights = filler_weights + quantile_width * shift

  assert_op = tf.Assert(tf.reduce_all(filler_weights >= 0.0), [filler_weights])
  with tf.control_dependencies([assert_op]):
    # Adds one more value to have tensors of the same shape to interleave them.
    quantile_weights = tf.ones(num_quantiles + 1) * quantile_width

    # Interleaves the filler_weights with the quantile weights.
    weights = tf.reshape(
        tf.stack([filler_weights, quantile_weights], axis=1), (-1,))[:-1]

    # Sends only the positive weights to the softsort operator.
    positive_weights = tf.boolean_mask(weights, weights > 0.0)
    all_quantiles = softsort(
        x,
        direction='ASCENDING',
        axis=axis,
        target_weights=positive_weights,
        **kwargs)

    # Recovers the indices corresponding to the desired quantiles.
    odds = tf.math.floormod(tf.range(weights.shape[0], dtype=tf.float32), 2)
    positives = tf.cast(weights > 0.0, tf.float32)
    indices = tf.cast(tf.math.cumsum(positives) * odds, dtype=tf.int32)
    indices = tf.boolean_mask(indices, indices > 0) - 1
    result = tf.gather(all_quantiles, indices, axis=axis)

    # In the specific case where we want a single quantile, squeezes the
    # quantile dimension.
    can_squeeze = tf.equal(tf.shape(result)[axis], 1)
    if tf.math.logical_and(can_squeeze, may_squeeze):
      result = tf.squeeze(result, axis=axis)
    return result
Esempio n. 25
0
def lower_bound_jensen_shannon(logu,
                               joint_sample_mask=None,
                               validate_args=False,
                               name=None):
    """Lower bound on Jensen-Shannon (JS) divergence.

  This lower bound on JS divergence is proposed in
  [Goodfellow et al. (2014)][1] and [Nowozin et al. (2016)][2].
  When estimating lower bounds on mutual information, one can also use
  different approaches for training the critic w.r.t. estimating
  mutual information [(Poole et al., 2018)][3]. The JS lower bound is
  used to train the critic with the standard lower bound on the
  Jensen-Shannon divergence as used in GANs, and then evaluates the
  critic using the NWJ lower bound on KL divergence, i.e. mutual information.
  As Eq.7 and Eq.8 of [Nowozin et al. (2016)][2], the bound is given by
  ```none
  I_JS = E_p(x,y)[log( D(x,y) )] + E_p(x)p(y)[log( 1 - D(x,y) )]
  ```
  where the first term is the expectation over the samples from joint
  distribution (positive samples), and the second is for the samples
  from marginal distributions (negative samples), with
  ```none
  D(x, y) = sigmoid(f(x, y)),
  log(D(x, y)) = softplus(-f(x, y)).
  ```
  `f(x, y)` is a critic function that scores all pairs of samples.

  Example:

  `X`, `Y` are samples from a joint Gaussian distribution, with
  correlation `0.8` and both of dimension `1`.

  ```python
  batch_size, rho, dim = 10000, 0.8, 1
  y, eps = tf.split(
      value=tf.random.normal(shape=(2 * batch_size, dim), seed=7),
      num_or_size_splits=2, axis=0)
  mean, conditional_stddev = rho * y, tf.sqrt(1. - tf.square(rho))
  x = mean + conditional_stddev * eps

  # Scores/unnormalized likelihood of pairs of samples `x[i], y[j]`
  # (For JS lower bound, the optimal critic is of the form `f(x, y) = 1 +
  # log(p(x | y) / p(x))` [(Poole et al., 2018)][3].)
  conditional_dist = tfd.MultivariateNormalDiag(
      mean, scale_identity_multiplier=conditional_stddev)
  conditional_scores = conditional_dist.log_prob(y[:, tf.newaxis, :])
  marginal_dist = tfd.MultivariateNormalDiag(tf.zeros(dim), tf.ones(dim))
  marginal_scores = marginal_dist.log_prob(y)[:, tf.newaxis]
  scores = 1 + conditional_scores - marginal_scores

  # Mask for joint samples in the score tensor
  # (The `scores` has its shape [x_batch_size, y_batch_size], i.e.
  # `scores[i, j] = f(x[i], y[j]) = log p(x[i] | y[j])`.)
  joint_sample_mask = tf.eye(batch_size, dtype=bool)

  # Lower bound on Jensen Shannon divergence
  lower_bound_jensen_shannon(logu=scores, joint_sample_mask=joint_sample_mask)
  ```

  Args:
    logu: `float`-like `Tensor` of size `[batch_size_1, batch_size_2]`
      representing critic scores (scores) for pairs of points (x, y) with
      `logu[i, j] = f(x[i], y[j])`.
    joint_sample_mask: `bool`-like `Tensor` of the same size as `logu`
      masking the positive samples by `True`, i.e. samples from joint
      distribution `p(x, y)`.
      Default value: `None`. By default, an identity matrix is constructed as
      the mask.
    validate_args: Python `bool`, default `False`. Whether to validate input
      with asserts. If `validate_args` is `False`, and the inputs are invalid,
      correct behavior is not guaranteed.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'lower_bound_jensen_shannon').

  Returns:
    lower_bound: `float`-like `scalar` for lower bound on JS divergence.

  #### References:

  [1]: Ian J. Goodfellow, et al. Generative Adversarial Nets. In
       _Conference on Neural Information Processing Systems_, 2014.
       https://arxiv.org/abs/1406.2661.
  [2]: Sebastian Nowozin, Botond Cseke, Ryota Tomioka. f-GAN: Training
       Generative Neural Samplers using Variational Divergence Minimization.
       In _Conference on Neural Information Processing Systems_, 2016.
       https://arxiv.org/abs/1606.00709.
  [3]: Ben Poole, Sherjil Ozair, Aaron van den Oord, Alexander A. Alemi,
       George Tucker. On Variational Bounds of Mutual Information. In
       _International Conference on Machine Learning_, 2019.
       https://arxiv.org/abs/1905.06922.
  """

    with tf.name_scope(name or 'lower_bound_jensen_shannon'):
        with tf.control_dependencies(
                _maybe_assert_float_matrix(logu, validate_args)):
            if joint_sample_mask is None:
                logu = tf.convert_to_tensor(logu,
                                            dtype_hint=tf.float32,
                                            name='logu')
                logu_diag = tf.linalg.diag_part(logu)
                joint_samples_nll = -tf.reduce_mean(tf.nn.softplus(-logu_diag),
                                                    axis=[-1])
                n, m = tf.unstack(
                    tf.cast(tf.shape(logu)[-2:], dtype=logu.dtype))
                marginal_samples_nll = (
                    (tf.reduce_sum(tf.nn.softplus(logu), axis=[-2, -1]) -
                     tf.reduce_sum(tf.nn.softplus(logu_diag), axis=[-1])) /
                    (n * (m - 1.)))
                return joint_samples_nll - marginal_samples_nll

            logu, joint_sample_mask = _check_and_get_mask(
                logu, joint_sample_mask, validate_args=validate_args)

            joint_samples = tf.boolean_mask(logu, joint_sample_mask)
            lower_bound = -tf.reduce_mean(tf.math.softplus(-joint_samples),
                                          axis=[-1])

            marginal_samples = tf.boolean_mask(logu, ~joint_sample_mask)  # pylint: disable=invalid-unary-operand-type
            lower_bound -= tf.reduce_mean(tf.math.softplus(marginal_samples),
                                          axis=[-1])
            return lower_bound
Esempio n. 26
0
def lower_bound_nguyen_wainwright_jordan(logu,
                                         joint_sample_mask=None,
                                         validate_args=False,
                                         name=None):
    """Lower bound on Kullback-Leibler (KL) divergence from Nguyen at al.

  The lower bound was introduced by Nguyen, Wainwright, Jordan (NWJ) in
  [Nguyen et al. (2010)][1], and also known as `f-GAN KL` [(Nowozin et al.,
  2016)][2] and `MINE-f` [(Belghazi et al., 2018)][3].
  ```none
  I_NWJ = E_p(x,y)[f(x, y)] - 1/e * E_p(y)[Z(y)],
  ```
  where `f(x, y)` is a critic function that scores pairs of samples `(x, y)`,
  and `Z(y)` is the corresponding partition function:
  ```none
  Z(y) = E_p(x)[ exp(f(x, y)) ].
  ```

  Example:

  `X`, `Y` are samples from a joint Gaussian distribution, with correlation
  `0.8` and both of dimension `1`.

  ```python
  batch_size, rho, dim = 10000, 0.8, 1
  y, eps = tf.split(
      value=tf.random.normal(shape=(2 * batch_size, dim), seed=7),
      num_or_size_splits=2, axis=0)
  mean, conditional_stddev = rho * y, tf.sqrt(1. - tf.square(rho))
  x = mean + conditional_stddev * eps

  # Scores/unnormalized likelihood of pairs of samples `x[i], y[j]`
  # (For NWJ lower bound, the optimal critic is of the form `f(x, y) = 1 +
  # log(p(x | y) / p(x))` [(Poole et al., 2018)][4]. )
  conditional_dist = tfd.MultivariateNormalDiag(
      mean, scale_identity_multiplier=conditional_stddev)
  conditional_scores = conditional_dist.log_prob(y[:, tf.newaxis, :])
  marginal_dist = tfd.MultivariateNormalDiag(tf.zeros(dim), tf.ones(dim))
  marginal_scores = marginal_dist.log_prob(y)[:, tf.newaxis]
  scores = 1 + conditional_scores - marginal_scores

  # Mask for joint samples in score tensor
  # (The `scores` has its shape [x_batch_size, y_batch_size], i.e.
  # `scores[i, j] = f(x[i], y[j]) = log p(x[i] | y[j])`.)
  joint_sample_mask = tf.eye(batch_size, dtype=bool)

  # Lower bound on KL divergence between p(x,y) and p(x)p(y),
  # i.e. the mutual information between `X` and `Y`.
  lower_bound_nguyen_wainwright_jordan(
      logu=scores, joint_sample_mask=joint_sample_mask)
  ```

  Args:
    logu: `float`-like `Tensor` of size `[batch_size_1, batch_size_2]`
      representing critic scores (scores) for pairs of points (x, y) with
      `logu[i, j] = f(x[i], y[j])`.
    joint_sample_mask: `bool`-like `Tensor` of the same size as `logu`
      masking the positive samples by `True`, i.e. samples from joint
      distribution `p(x, y)`.
      Default value: `None`. By default, an identity matrix is constructed as
      the mask.
    validate_args: Python `bool`, default `False`. Whether to validate input
      with asserts. If `validate_args` is `False`, and the inputs are invalid,
      correct behavior is not guaranteed.
    name: Python `str` name prefixed to Ops created by this function.
      Default value: `None` (i.e., 'lower_bound_nguyen_wainwright_jordan').

  Returns:
    lower_bound: `float`-like `scalar` for lower bound on KL divergence
      between joint and marginal distrbutions.

  #### References:

  [1]: XuanLong Nguyen, Martin J. Wainwright, Michael I. Jordan.
       Estimating Divergence Functionals and the Likelihood Ratio
       by Convex Risk Minimization. _IEEE Transactions on Information Theory_,
       56(11):5847-5861, 2010. https://arxiv.org/abs/0809.0853.
  [2]: Sebastian Nowozin, Botond Cseke, Ryota Tomioka. f-GAN: Training
       Generative Neural Samplers using Variational Divergence Minimization.
       In _Conference on Neural Information Processing Systems_, 2016.
       https://arxiv.org/abs/1606.00709.
  [3]: Mohamed Ishmael Belghazi, et al. MINE: Mutual Information Neural
       Estimation. In _International Conference on Machine Learning_, 2018.
       https://arxiv.org/abs/1801.04062.
  [4]: Ben Poole, Sherjil Ozair, Aaron van den Oord, Alexander A. Alemi,
       George Tucker. On Variational Bounds of Mutual Information. In
       _International Conference on Machine Learning_, 2019.
       https://arxiv.org/abs/1905.06922.
  """

    with tf.name_scope(name or 'lower_bound_nguyen_wainwright_jordan'):
        with tf.control_dependencies(
                _maybe_assert_float_matrix(logu, validate_args)):
            if joint_sample_mask is None:
                logu = tf.convert_to_tensor(logu,
                                            dtype_hint=tf.float32,
                                            name='logu')
                joint_term = tf.reduce_mean(tf.linalg.diag_part(logu),
                                            axis=[-1])
                num_rows, num_cols = tf.unstack(tf.shape(logu)[-2:])
                marginal_sample_mask = ~tf.eye(
                    num_rows, num_cols, dtype=tf.bool)
            else:
                logu, joint_sample_mask = _check_and_get_mask(
                    logu, joint_sample_mask, validate_args=validate_args)
                joint_term = tf.reduce_mean(tf.boolean_mask(
                    logu, joint_sample_mask),
                                            axis=[-1])
                marginal_sample_mask = ~joint_sample_mask  # pylint: disable=invalid-unary-operand-type

            marginal_term = _masked_logmeanexp(logu,
                                               marginal_sample_mask,
                                               axis=[-2, -1])
            return joint_term - tf.math.exp(marginal_term - 1.)
    def call(self, x, u):
        x = tf.cast(x, dtype=tf.float32)
        u = tf.cast(u, dtype=tf.int8)
        ######### Your code starts here #########
        # We want to perform a forward-pass of the network. Using the weights and biases, this function should give the network output for (x,u) where:
        # - x is a (? x |O|) tensor that keeps a batch of observations
        # - u is a (? x 1) tensor (a vector indeed) that keeps the high-level commands (goals) to denote which branch of the network to use
        # FYI: For the intersection scenario, u=0 means the goal is to turn left, u=1 straight, and u=2 right.
        # HINT 1: Looping over all data samples may not be the most computationally efficient way of doing branching
        # HINT 2: While implementing this, we found tf.math.equal and tf.cast useful. This is not necessarily a requirement though.

        bach_size = len(x)

        y_1 = tf.matmul(x, self.w1) - self.b1
        y_1 = tf.math.tanh(y_1)

        mask_0 = tf.math.equal(u, 0)

        mask_0 = tf.reshape(mask_0, [bach_size])
        mask_1 = tf.math.equal(u, 1)
        mask_1 = tf.reshape(mask_1, [bach_size])
        mask_2 = tf.math.equal(u, 2)
        mask_2 = tf.reshape(mask_2, [bach_size])

        y_2 = tf.boolean_mask(y_1, mask_0)
        y_3 = tf.boolean_mask(y_1, mask_1)
        y_4 = tf.boolean_mask(y_1, mask_2)

        y_5 = tf.matmul(y_2, self.w2) - self.b2
        y_5 = tf.math.sigmoid(y_5)
        y_6 = tf.matmul(y_5, self.w3) - self.b3

        y_7 = tf.matmul(y_3, self.w4) - self.b4
        y_7 = tf.math.sigmoid(y_7)
        y_8 = tf.matmul(y_7, self.w5) - self.b5

        y_9 = tf.matmul(y_4, self.w6) - self.b6
        y_9 = tf.math.sigmoid(y_9)
        y_10 = tf.matmul(y_9, self.w7) - self.b7

        # y_est = tf.concat([y_6, y_8, y_10], 0)

        indices_zero = tf.cast(mask_0, dtype=tf.float32)
        diag_zero = tf1.linalg.tensor_diag(indices_zero)
        final_zero = tf.transpose(tf.boolean_mask(diag_zero, mask_0))
        final_zero = tf.cast(final_zero, tf.float32)

        indices_one = tf.cast(mask_1, dtype=tf.float32)
        diag_one = tf1.linalg.tensor_diag(indices_one)
        final_one = tf.transpose(tf.boolean_mask(diag_one, mask_1))
        final_one = tf.cast(final_one, tf.float32)

        indices_two = tf.cast(mask_2, dtype=tf.float32)
        diag_two = tf1.linalg.tensor_diag(indices_two)
        final_two = tf.transpose(tf.boolean_mask(diag_two, mask_2))
        final_two = tf.cast(final_two, tf.float32)

        y_est = tf.matmul(final_zero, y_6) + tf.matmul(
            final_one, y_8) + tf.matmul(final_two, y_10)

        # y_est = tf.matmul(final_zero, y_6) + tf.matmul(final_one, y_8)
        return y_est
Esempio n. 28
0
    def _head(self, neck_outputs):
        # Shape : [time * batch]
        path_ids = neck_outputs[constants.PATH_ID]
        path_ids = tf.transpose(
            tf.reshape(
                path_ids,
                [self._current_num_timesteps, self._current_batch_size]))

        # <tf.float32>[time * batch_size, 1, hidden_dim]
        visual_feature = neck_outputs['visual_feature']
        # <tf.float32>[time * batch_size, num_tokens, hidden_dim]
        raw_text_feature = tf.reshape(
            neck_outputs['text_feature'],
            [self._current_num_timesteps, self._current_batch_size] +
            neck_outputs['text_feature'].shape[1:].as_list())
        # Shape = [batch_size, time, num_tokens, hidden_dim]
        raw_text_feature = tf.transpose(raw_text_feature, perm=[1, 0, 2, 3])

        # <tf.float32>[time, batch_size, 1, hidden_dim]
        visual_feature = tf.reshape(
            visual_feature,
            [self._current_num_timesteps, self._current_batch_size] +
            visual_feature.shape[1:].as_list())

        # <tf.float32>[batch_size, time, hidden_dim]
        visual_feature = tf.squeeze(visual_feature, axis=2)
        visual_feature = tf.transpose(visual_feature, [1, 0, 2])

        first_true = utils.get_first_true_column(
            tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]))
        first_true = tf.transpose(first_true)

        # Sanity Check: path_ids are consistent for first_true and last_true.
        last_true = utils.get_last_true_column(
            tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]))
        last_true = tf.transpose(last_true)
        path_ids_first_true = tf.cond(
            tf.keras.backend.any(first_true),
            lambda: tf.boolean_mask(path_ids, first_true),
            lambda: path_ids[:, 0])
        path_ids_last_true = tf.cond(
            tf.keras.backend.any(last_true),
            lambda: tf.boolean_mask(path_ids, last_true),
            lambda: path_ids[:, 0])
        tf.debugging.assert_equal(path_ids_first_true, path_ids_last_true)

        # <tf.float32>[batch_size, num_tokens, hidden_dim]
        text_feature = tf.cond(
            tf.keras.backend.any(first_true),
            lambda: tf.boolean_mask(raw_text_feature, first_true),
            lambda: raw_text_feature[:, 0, :, :])

        text_feature_last_true = tf.cond(
            tf.keras.backend.any(last_true),
            lambda: tf.boolean_mask(raw_text_feature, last_true),
            lambda: raw_text_feature[:, 0, :, :])
        tf.debugging.assert_equal(text_feature, text_feature_last_true)
        # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2)
        # text_feature = tf.nn.l2_normalize(text_feature, axis=2)

        # <tf.float32>[batch_size, time, num_tokens]
        alpha_i_j = tf.matmul(visual_feature,
                              tf.transpose(text_feature, perm=[0, 2, 1]))
        # <tf.float32>[batch, time, num_tokens]
        c_i_j = tf.nn.softmax(alpha_i_j)
        # <tf.float32>[batch_size, time, num_tokens]
        mask = tf.cast(
            tf.transpose(tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]),
                         perm=[1, 0]), tf.float32)

        # <tf.float32>[batch, time]
        score = tf.reduce_sum(c_i_j * alpha_i_j, 2)

        # Compute softmin(x) = softmax(-x)
        # Use stable softmax since softmax(x) = softmax(x+c) for any constant c.
        # Here we use constant c = max(-x).
        negative_score = -1.0 * score
        escore = tf.exp(negative_score - tf.reduce_max(negative_score)) * mask
        sum_escore = tf.tile(tf.expand_dims(tf.reduce_sum(escore, 1), 1),
                             [1, tf.shape(escore)[1]])
        score_weight = tf.divide(escore, sum_escore)

        similarities = tf.reduce_sum(mask * score * score_weight, 1)
        similarities = tf.expand_dims(similarities, axis=0)
        # shape: [time * batch_size]
        similarities = tf.reshape(
            tf.tile(similarities, [self._current_num_timesteps, 1]), [-1])

        # Apply an affine transform.
        similarities = similarities * self.affine_a + self.affine_b

        output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1])
        output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1])

        # shape: [time * batch]
        output_a = tf.reshape(
            tf.tile(output_a,
                    [self._current_num_timesteps, self._current_batch_size]),
            [-1])
        output_b = tf.reshape(
            tf.tile(output_b,
                    [self._current_num_timesteps, self._current_batch_size]),
            [-1])

        return common.AgentOutput(policy_logits=similarities,
                                  baseline=(output_a, output_b))
Esempio n. 29
0
  def _sample_paths(self,
                    times,
                    time_step,
                    num_samples,
                    random_type,
                    skip,
                    seed):
    """Returns a sample of paths from the process."""
    # Note: all the notations below are the same as in [2].
    times, keep_mask = _prepare_grid(times, time_step)
    # Add zeros as a starting location
    dt = times[1:] - times[:-1]
    if dt.shape.is_fully_defined():
      steps_num = dt.shape.as_list()[-1]
    else:
      steps_num = tf.shape(dt)[-1]

    # In order to use low-discrepancy random_type we need to generate the
    # sequence of independent random normals upfront. We also precompute random
    # numbers for stateless random type in order to ensure independent samples
    # for multiple function calls whith different seeds.
    if random_type in (random.RandomType.SOBOL,
                       random.RandomType.HALTON,
                       random.RandomType.HALTON_RANDOMIZED,
                       random.RandomType.STATELESS,
                       random.RandomType.STATELESS_ANTITHETIC):
      normal_draws = utils.generate_mc_normal_draws(
          num_normal_draws=self._dim, num_time_steps=steps_num,
          num_sample_paths=num_samples, random_type=random_type,
          seed=seed,
          dtype=self._dtype, skip=skip)
    else:
      normal_draws = None

    cond_fn = lambda i, *args: i < tf.size(dt)
    def body_fn(i, written_count,
                current_x,
                current_y,
                x_paths,
                y_paths):
      """Simulate qG-HJM process to the next time point."""
      if normal_draws is None:
        normals = random.mv_normal_sample(
            (num_samples,),
            mean=tf.zeros((self._dim,), dtype=self._dtype),
            random_type=random_type, seed=seed)
      else:
        normals = normal_draws[i]

      if self._sqrt_rho is not None:
        normals = tf.linalg.matvec(self._sqrt_rho, normals)

      vol = self._volatility(times[i + 1], current_x)

      next_x = (current_x
                + (current_y - self._mean_reversion * current_x) * dt[i]
                + vol * normals * tf.math.sqrt(dt[i]))
      next_y = current_y + (vol**2 -
                            2.0 * self._mean_reversion * current_y) * dt[i]

      # Update `x_paths` and `y_paths`
      x_paths = utils.maybe_update_along_axis(
          tensor=x_paths,
          do_update=True,
          ind=written_count + 1,
          axis=1,
          new_tensor=tf.expand_dims(next_x, axis=1))
      y_paths = utils.maybe_update_along_axis(
          tensor=y_paths,
          do_update=True,
          ind=written_count + 1,
          axis=1,
          new_tensor=tf.expand_dims(next_y, axis=1))

      written_count += 1
      return (i + 1, written_count, next_x, next_y, x_paths, y_paths)

    x_paths = tf.zeros((num_samples, times.shape.as_list()[0], self._factors),
                       dtype=self._dtype)
    y_paths = tf.zeros((num_samples, times.shape.as_list()[0], self._factors),
                       dtype=self._dtype)

    initial_x = tf.zeros((num_samples, self._factors), dtype=self._dtype)
    initial_y = tf.zeros((num_samples, self._factors), dtype=self._dtype)

    _, _, _, _, x_paths, y_paths = tf.while_loop(
        cond_fn, body_fn, (0, 0, initial_x, initial_y, x_paths, y_paths))

    f_0_t = self._instant_forward_rate_fn(times)  # shape=(num_times,)
    rate_paths = tf.math.reduce_sum(
        x_paths, axis=-1) + f_0_t  # shape=(num_samples, num_times)

    discount_factor_paths = tf.math.exp(-rate_paths[:, :-1] * dt)
    discount_factor_paths = tf.concat(
        [tf.ones((num_samples, 1), dtype=self._dtype), discount_factor_paths],
        axis=1)  # shape=(num_samples, num_times)
    discount_factor_paths = utils.cumprod_using_matvec(discount_factor_paths)

    return (
        tf.boolean_mask(rate_paths, keep_mask, axis=1),
        tf.boolean_mask(discount_factor_paths, keep_mask, axis=1),
        tf.boolean_mask(x_paths, keep_mask, axis=1),
        tf.boolean_mask(y_paths, keep_mask, axis=1)
        )
Esempio n. 30
0
def segment_diff(x,
                 segment_ids,
                 order=1,
                 exclusive=False,
                 dtype=None,
                 name=None):
    """Computes difference of successive elements in a segment.

  For a complete description of segment_* ops see documentation of
  `tf.segment_max`. This op extends the `diff` functionality to segmented
  inputs.

  The behaviour of this op is the same as that of the op `diff` within each
  segment. The result is effectively a concatenation of the results of `diff`
  applied to each segment.

  ## Example

  ```python
    x = tf.constant([2, 5, 1, 7, 9] + [32, 10, 12, 3] + [4, 8, 5])
    segments = tf.constant([0, 0, 0, 0, 0] + [1, 1, 1, 1] + [2, 2, 2])
    # First order diff. Expected result: [3, -4, 6, 2, -22, 2, -9, 4, -3]
    dx1 = segment_diff(
        x, segment_ids=segments, order=1, exclusive=True)
    # Non-exclusive, second order diff.
    # Expected result: [2, 5, -1, 2, 8, 32, 10, -20, -7, 4, 8, 1]
    dx2 = segment_diff(
        x, segment_ids=segments, order=2, exclusive=False)
  ```

  Args:
    x: A rank 1 `Tensor` of any dtype for which arithmetic operations are
      permitted.
    segment_ids: A `Tensor`. Must be one of the following types: int32, int64. A
      1-D tensor whose size is equal to the size of `x`. Values should be sorted
      and can be repeated.
    order: Positive Python int. The order of the difference to compute. `order =
      1` corresponds to the difference between successive elements.
      Default value: 1
    exclusive: Python bool. See description above.
      Default value: False
    dtype: Optional `tf.Dtype`. If supplied, the dtype for `x` to use when
      converting to `Tensor`.
      Default value: None which maps to the default dtype inferred by TF.
    name: Python `str` name prefixed to Ops created by this class.
      Default value: None which is mapped to the default name 'segment_diff'.

  Returns:
    diffs: A `Tensor` of the same dtype as `x`. Assuming that each segment is
      of length greater than or equal to order, if `exclusive` is True,
      then the size is `n-order*k` where `n` is the size of x,
      `k` is the number of different segment ids supplied if `segment_ids` is
      not None or 1 if `segment_ids` is None. If any of the segments is of
      length less than the order, then the size is:
      `n-sum(min(order, length(segment_j)), j)` where the sum is over segments.
      If `exclusive` is False, then the size is `n`.
  """
    with tf.compat.v1.name_scope(name, default_name='segment_diff',
                                 values=[x]):
        x = tf.convert_to_tensor(x, dtype=dtype)
        raw_diffs = diff_ops.diff(x, order=order, exclusive=exclusive)
        if segment_ids is None:
            return raw_diffs
        # If segment ids are supplied, raw_diffs are incorrect at locations:
        # p, p+1, ... min(p+order-1, m_p-1) where p is the index of the first
        # element of a segment other than the very first segment (which is
        # already correct). m_p is the segment length.
        # Find positions where the segments begin.
        has_segment_changed = tf.concat(
            [[False],
             tf.not_equal(segment_ids[1:] - segment_ids[:-1], 0)],
            axis=0)
        # Shape [k, 1]
        segment_start_index = tf.cast(tf.where(has_segment_changed),
                                      dtype=tf.int32)
        segment_end_index = tf.concat([
            tf.reshape(segment_start_index, [-1])[1:], [tf.size(segment_ids)]
        ],
                                      axis=0)
        segment_end_index = tf.reshape(segment_end_index, [-1, 1])
        # The indices of locations that need to be adjusted. This needs to be
        # constructed in steps. First we generate p, p+1, ... p+order-1.
        # Shape [num_segments-1, order]
        fix_indices = (segment_start_index +
                       tf.range(order, dtype=segment_start_index.dtype))
        in_bounds = tf.where(fix_indices < segment_end_index)
        # Keep only the ones in bounds.
        fix_indices = tf.reshape(tf.gather_nd(fix_indices, in_bounds), [-1, 1])

        needs_fix = tf.scatter_nd(
            fix_indices,
            # Unfortunately, scatter_nd doesn't support bool on GPUs so we need to
            # do ints here and then convert to bool.
            tf.reshape(tf.ones_like(fix_indices, dtype=tf.int32), [-1]),
            shape=tf.shape(x))
        # If exclusive is False, then needs_fix means we need to replace the values
        # in raw_diffs at those locations with the values in x.
        needs_fix = tf.cast(needs_fix, dtype=tf.bool)
        if not exclusive:
            return tf.where(needs_fix, x, raw_diffs)

        # If exclusive is True, we have to be more careful. The raw_diffs
        # computation has removed the first 'order' elements. After removing the
        # corresponding elements from needs_fix, we use it to remove the elements
        # from raw_diffs.
        return tf.boolean_mask(raw_diffs, tf.logical_not(needs_fix[order:]))