Exemple #1
0
    def _scale_mle(self, samples, scale_candidates):
        """Max log-likelihood estimate for scale.

    Args:
      samples: Observed data points.
      scale_candidates: A simple grid of candiates for
        scale, with shape original_batch_shape + [num_candidates],
        where different candidates for a single scalar parameter are at the
        inner most dimension (axis -1).
    Returns:
      scale_mle: max log-likelihood estimate for scale.
    """
        dist = tfd.Horseshoe(scale=scale_candidates)
        dims = tf.shape(scale_candidates)
        num_candidates = dims[-1]
        original_batch_shape = dims[:-1]
        # log_likelihood has same shape as scale_candidates
        # i.e. original_batch_shape + [num_candidates]
        log_likelihood = tf.reduce_sum(
            # dist.log_prob here returns a tensor with shape
            # [num_samples] + original_batch_shape + [num_candidates]
            dist.log_prob(
                tf.reshape(
                    samples,
                    tf.concat([[-1], original_batch_shape, [1]], axis=0))),
            axis=0)
        # max log-likelihood candidate location mask
        mask = tf.one_hot(tf.argmax(log_likelihood, axis=-1),
                          depth=num_candidates,
                          dtype=self.dtype)
        return tf.reduce_sum(scale_candidates * mask, axis=-1)
Exemple #2
0
 def accuracy(y_true, y_pred):
     """Accuracy."""
     del y_pred  # unused arg
     y_true = tf.squeeze(y_true)
     return tf.equal(
         tf.argmax(input=model.output.distribution.logits, axis=1),
         tf.cast(y_true, tf.int64))
Exemple #3
0
 def predict_single_comment(self, token_seq: List[int]):
     self.hyperparameters["batch_size"] = 1
     output_logits = self.compute_logits(np.array([token_seq],
                                                  dtype=np.int32),
                                         training=False)
     next_tok_logits = output_logits[0, :, :]
     next_tok_ids = tf.argmax(next_tok_logits, 1).numpy()
     return next_tok_ids
Exemple #4
0
        def body(m, pchol, perm, matrix_diag):
            """Body of a single `tf.while_loop` iteration."""
            # Here is roughly a numpy, non-batched version of what's going to happen.
            # (See also Algorithm 1 of Harbrecht et al.)
            # 1: maxi = np.argmax(matrix_diag[perm[m:]]) + m
            # 2: maxval = matrix_diag[perm][maxi]
            # 3: perm[m], perm[maxi] = perm[maxi], perm[m]
            # 4: row = matrix[perm[m]][perm[m + 1:]]
            # 5: row -= np.sum(pchol[:m][perm[m + 1:]] * pchol[:m][perm[m]]], axis=-2)
            # 6: pivot = np.sqrt(maxval); row /= pivot
            # 7: row = np.concatenate([[[pivot]], row], -1)
            # 8: matrix_diag[perm[m:]] -= row**2
            # 9: pchol[m, perm[m:]] = row

            # Find the maximal position of the (remaining) permuted diagonal.
            # Steps 1, 2 above.
            permuted_diag = batch_gather(matrix_diag, perm[..., m:])
            maxi = tf.argmax(permuted_diag, axis=-1,
                             output_type=tf.int64)[..., tf.newaxis]
            maxval = batch_gather(permuted_diag, maxi)
            maxi = maxi + m
            maxval = maxval[..., 0]
            # Update perm: Swap perm[...,m] with perm[...,maxi]. Step 3 above.
            perm = _swap_m_with_i(perm, m, maxi)
            # Step 4.
            row = batch_gather(matrix, perm[..., m:m + 1], axis=-2)
            row = batch_gather(row, perm[..., m + 1:])
            # Step 5.
            prev_rows = pchol[..., :m, :]
            prev_rows_perm_m_onward = batch_gather(prev_rows, perm[...,
                                                                   m + 1:])
            prev_rows_pivot_col = batch_gather(prev_rows, perm[..., m:m + 1])
            row -= tf.reduce_sum(input_tensor=prev_rows_perm_m_onward *
                                 prev_rows_pivot_col,
                                 axis=-2)[..., tf.newaxis, :]
            # Step 6.
            pivot = tf.sqrt(maxval)[..., tf.newaxis, tf.newaxis]
            # Step 7.
            row = tf.concat([pivot, row / pivot], axis=-1)
            # TODO(b/130899118): Pad grad fails with int64 paddings.
            # Step 8.
            paddings = tf.concat([
                tf.zeros([prefer_static.rank(pchol) - 1, 2], dtype=tf.int32),
                [[tf.cast(m, tf.int32), 0]]
            ],
                                 axis=0)
            diag_update = tf.pad(tensor=row**2, paddings=paddings)[..., 0, :]
            reverse_perm = _invert_permutation(perm)
            matrix_diag -= batch_gather(diag_update, reverse_perm)
            # Step 9.
            row = tf.pad(tensor=row, paddings=paddings)
            # TODO(bjp): Defer the reverse permutation all-at-once at the end?
            row = batch_gather(row, reverse_perm)
            pchol_shape = pchol.shape
            pchol = tf.concat([pchol[..., :m, :], row, pchol[..., m + 1:, :]],
                              axis=-2)
            tensorshape_util.set_shape(pchol, pchol_shape)
            return m + 1, pchol, perm, matrix_diag
Exemple #5
0
def argmax(a, axis=None):
    a = array_creation.asarray(a)
    a = atleast_1d(a)
    if axis is None:
        # When axis is None numpy flattens the array.
        a_t = tf.reshape(a.data, [-1])
    else:
        a_t = a.data
    return utils.tensor_to_ndarray(tf.argmax(input=a_t, axis=axis))
Exemple #6
0
    def accuracy_function(real, pred):
        accuracies = tf.equal(real, tf.argmax(pred, axis=2))

        mask = tf.math.logical_not(tf.math.equal(real, 0))
        accuracies = tf.math.logical_and(mask, accuracies)

        accuracies = tf.cast(accuracies, dtype=tf.float32)
        mask = tf.cast(mask, dtype=tf.float32)
        return tf.reduce_sum(accuracies) / tf.reduce_sum(mask)
 def forward_step(previous_step_pair, log_prob_observation):
   log_prob_previous = previous_step_pair[0]
   log_prob = (log_prob_previous[..., tf.newaxis] +
               log_trans +
               log_prob_observation[..., tf.newaxis, :])
   most_likely_given_successor = tf.argmax(log_prob, axis=-2)
   max_log_p_given_successor = tf.reduce_max(log_prob,
                                             axis=-2)
   return (max_log_p_given_successor, most_likely_given_successor)
Exemple #8
0
def one_hot_argmax(inputs, temperature, axis=-1):
    """Returns one-hot of argmax with backward pass set to softmax-temperature."""
    vocab_size = inputs.shape[-1]
    hard = tf.one_hot(tf.argmax(inputs, axis=axis),
                      depth=vocab_size,
                      axis=axis,
                      dtype=inputs.dtype)
    soft = tf.nn.softmax(inputs / temperature, axis=axis)
    outputs = soft + tf.stop_gradient(hard - soft)
    return outputs
Exemple #9
0
    def test_unbatched_rank_one_raise(self):
        with self.assertRaises(ValueError):
            input_tensor = tf.constant([-0.6, -0.5, 0.5])
            dim = len(input_tensor)
            n = 10000000

            argmax = lambda t: tf.one_hot(tf.argmax(t, 1), dim)
            soft_argmax = perturbations.perturbed(argmax,
                                                  sigma=0.5,
                                                  num_samples=n)
            _ = soft_argmax(input_tensor)
Exemple #10
0
def _compute_calibration_bin_statistics(num_bins,
                                        logits=None,
                                        labels_true=None,
                                        labels_predicted=None):
    """Compute binning statistics required for calibration measures.

  Args:
    num_bins: int, number of probability bins, e.g. 10.
    logits: Tensor, (n,nlabels), with logits for n instances and nlabels.
    labels_true: Tensor, (n,), with tf.int32 or tf.int64 elements containing
      ground truth class labels in the range [0,nlabels].
    labels_predicted: Tensor, (n,), with tf.int32 or tf.int64 elements
      containing decisions of the predictive system.  If `None`, we will use
      the argmax decision using the `logits`.

  Returns:
    bz: Tensor, shape (2,num_bins), tf.int32, counts of incorrect (row 0) and
      correct (row 1) predictions in each of the `num_bins` probability bins.
    pmean_observed: Tensor, shape (num_bins,), tf.float32, the mean predictive
      probabilities in each probability bin.
  """

    if labels_predicted is None:
        # If no labels are provided, we take the label with the maximum probability
        # decision.  This corresponds to the optimal expected minimum loss decision
        # under 0/1 loss.
        pred_y = tf.argmax(logits, axis=1, output_type=labels_true.dtype)
    else:
        pred_y = labels_predicted

    correct = tf.cast(tf.equal(pred_y, labels_true), tf.int32)

    # Collect predicted probabilities of decisions
    pred = tf.nn.softmax(logits, axis=1)
    prob_y = tf.gather(pred, pred_y[:, tf.newaxis],
                       batch_dims=1)  # p(pred_y | x)
    prob_y = tf.reshape(prob_y, (ps.size(prob_y), ))

    # Compute b/z histogram statistics:
    # bz[0,bin] contains counts of incorrect predictions in the probability bin.
    # bz[1,bin] contains counts of correct predictions in the probability bin.
    bins = tf.histogram_fixed_width_bins(prob_y, [0.0, 1.0], nbins=num_bins)
    event_bin_counts = tf.math.bincount(correct * num_bins + bins,
                                        minlength=2 * num_bins,
                                        maxlength=2 * num_bins)
    event_bin_counts = tf.reshape(event_bin_counts, (2, num_bins))

    # Compute mean predicted probability value in each of the `num_bins` bins
    pmean_observed = tf.math.unsorted_segment_sum(prob_y, bins, num_bins)
    tiny = np.finfo(dtype_util.as_numpy_dtype(logits.dtype)).tiny
    pmean_observed = pmean_observed / (
        tf.cast(tf.reduce_sum(event_bin_counts, axis=0), logits.dtype) + tiny)

    return event_bin_counts, pmean_observed
Exemple #11
0
def get_score(period_score, within_period_score):
    """Combine the period and periodicity scores."""
    within_period_score = tf.nn.sigmoid(within_period_score)[:, 0]
    per_frame_periods = tf.argmax(period_score, axis=-1) + 1
    pred_period_conf = tf.reduce_max(tf.nn.softmax(period_score, axis=-1),
                                     axis=-1)
    pred_period_conf = tf.where(tf.math.less(per_frame_periods, 3), 0.0,
                                pred_period_conf)
    within_period_score *= pred_period_conf
    within_period_score = np.sqrt(within_period_score)
    pred_score = tf.reduce_mean(within_period_score)
    return pred_score, within_period_score
Exemple #12
0
def _compute_accuracy(labels: tf.Tensor, logits: tf.Tensor) -> tf.Tensor:
  """Computes classification accuracy given logits and dense labels.

  Args:
    labels: Integer Tensor of dense labels, shape [batch_size].
    logits: Tensor of shape [batch_size, num_classes].
  Returns:
    A scalar for the classification accuracy.
  """
  correct_prediction = tf.equal(
      tf.argmax(logits, 1, output_type=tf.int32), labels)
  return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Exemple #13
0
    def _decode(self,
                target_ids,
                target_mask,
                start_token_ids,
                encoder_output,
                encoder_mask,
                training=None):
        """Compute likelihood of target tokens under the model.

    Args:
      target_ids: tensor with shape [batch_size, target_length, hidden_size]
      target_mask: self-attention bias for decoder attention layer. [batch_size,
        input_length]
      start_token_ids: int32 tensor of shape [batch_size] for first decoder
        input.
      encoder_output: Continuous representation of input sequence. Float tensor
        with shape [batch_size, input_length, hidden_size].
      encoder_mask: Float tensor with shape [batch_size, input_length].
      training: Boolean indicating whether the call is training or inference.

    Returns:
      A dict containing the output ids, the output log-probs, the output logits.
    """

        # Prepare inputs to decoder layers by shifting targets, embedding ids,
        # adding positional encoding and applying dropout.
        input_ids = self.get_inputs_from_targets(target_ids, start_token_ids)

        input_embs = self.embeder(input_ids,
                                  self.params["max_decoder_length"],
                                  training=training)

        outputs = self.decoder(input_embs,
                               target_mask,
                               encoder_output,
                               encoder_mask,
                               training=training)

        logits = self.embeder.linear(outputs)
        output_ids = tf.cast(tf.argmax(logits, axis=-1), tf.int32)

        log_probs = -tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=target_ids, logits=logits)
        log_probs = tf.where(target_ids > 0, log_probs,
                             tf.zeros_like(log_probs, tf.float32))

        return (
            tf.identity(log_probs, name="log_probs"),
            tf.identity(logits, name="logits"),
            tf.cast(output_ids, tf.int32, name="pred_ids"),
        )
Exemple #14
0
                    def _reduce_multiple_steps():
                        """Perform `reduce_max` operation when `num_steps` > 1."""
                        def forward_step(previous_step_pair,
                                         log_prob_observation):
                            log_prob_previous = previous_step_pair[0]
                            log_prob = (
                                log_prob_previous[..., tf.newaxis] +
                                self._log_trans +
                                log_prob_observation[..., tf.newaxis, :])
                            most_likely_given_successor = tf.argmax(log_prob,
                                                                    axis=-2)
                            max_log_p_given_successor = tf.reduce_max(log_prob,
                                                                      axis=-2)
                            return (max_log_p_given_successor,
                                    most_likely_given_successor)

                        forward_log_probs, all_most_likely_given_successor = tf.scan(
                            forward_step,
                            observation_log_probs[1:],
                            initializer=(log_prob,
                                         tf.zeros(tf.shape(log_prob),
                                                  dtype=tf.int64)),
                            name="forward_log_probs")

                        most_likely_end = tf.argmax(forward_log_probs[-1],
                                                    axis=-1)

                        # We require the operation that gives C from A and B where
                        # C[i...j] = A[i...j, B[i...j]]
                        # and A = most_likely_given_successor
                        #     B = most_likely_successor.
                        # tf.gather requires indices of known shape so instead we use
                        # reduction with tf.one_hot(B) to pick out elements from B
                        def backward_step(most_likely_successor,
                                          most_likely_given_successor):
                            return tf.reduce_sum(
                                (most_likely_given_successor *
                                 tf.one_hot(most_likely_successor,
                                            self._num_states,
                                            dtype=tf.int64)),
                                axis=-1)

                        backward_scan = tf.scan(
                            backward_step,
                            all_most_likely_given_successor,
                            most_likely_end,
                            reverse=True)
                        most_likely_sequences = tf.concat(
                            [backward_scan, [most_likely_end]], axis=0)
                        return distribution_util.move_dimension(
                            most_likely_sequences, 0, -1)
Exemple #15
0
    def testMultiplicativeInverse(self):
        batch_size = 3
        vocab_size = 79
        length = 5
        inputs = np.random.randint(0,
                                   vocab_size - 1,
                                   size=(batch_size, length))
        one_hot_inputs = tf.one_hot(inputs, depth=vocab_size)

        one_hot_inv = ed.layers.utils.multiplicative_inverse(
            one_hot_inputs, vocab_size)
        inv_inputs = tf.argmax(one_hot_inv, axis=-1)
        inputs_inv_inputs = tf.math.floormod(inputs * inv_inputs, vocab_size)
        self.assertAllEqual(inputs_inv_inputs, np.ones((batch_size, length)))
Exemple #16
0
    def update_state(self, labels, probabilities, **kwargs):
        """Updates this metric.

    Args:
      labels: Tensor of shape (N,) of class labels, one per example.
      probabilities: Tensor of shape (N,) or (N, k) of normalized probabilities
        associated with the True class in the binary case or with each of k
        classes in the multiclass case.
      **kwargs: Other potential keywords, which will be ignored by this method.
    """
        del kwargs  # unused
        labels = tf.squeeze(tf.convert_to_tensor(labels))
        probabilities = tf.convert_to_tensor(probabilities, self.dtype)

        if self.num_classes == 2:
            # Explicitly ensure probs have shape [n, 2] instead of [n, 1] or [n,].
            n = tf.shape(probabilities)[0]
            k = tf.size(probabilities) // n
            probabilities = tf.reshape(probabilities, [n, k])
            probabilities = tf.cond(
                k < 2,
                lambda: tf.concat([1. - probabilities, probabilities], axis=1),
                lambda: probabilities)

        pred_labels = tf.argmax(probabilities, axis=1)
        pred_probs = tf.reduce_max(probabilities, axis=1)
        correct_preds = tf.equal(pred_labels, tf.cast(labels,
                                                      pred_labels.dtype))
        correct_preds = tf.cast(correct_preds, self.dtype)

        bin_indices = tf.histogram_fixed_width_bins(pred_probs,
                                                    tf.constant([0., 1.],
                                                                self.dtype),
                                                    nbins=self.num_bins)
        batch_correct_sums = tf.math.unsorted_segment_sum(
            data=tf.cast(correct_preds, self.dtype),
            segment_ids=bin_indices,
            num_segments=self.num_bins)
        batch_prob_sums = tf.math.unsorted_segment_sum(
            data=pred_probs,
            segment_ids=bin_indices,
            num_segments=self.num_bins)
        batch_counts = tf.math.unsorted_segment_sum(
            data=tf.ones_like(bin_indices),
            segment_ids=bin_indices,
            num_segments=self.num_bins)
        batch_counts = tf.cast(batch_counts, self.dtype)
        self.correct_sums.assign_add(batch_correct_sums)
        self.prob_sums.assign_add(batch_prob_sums)
        self.counts.assign_add(batch_counts)
Exemple #17
0
    def sample(self, gray_cond, mode='argmax'):
        output = {}

        z_gray = self.encoder(gray_cond, training=False)
        if self.is_parallel_loss:
            z_logits = self.parallel_dense(z_gray)
            parallel_image = tf.argmax(z_logits, axis=-1, output_type=tf.int32)
            parallel_image = self.post_process_image(parallel_image)

            output['parallel'] = parallel_image

        image, proba = self.autoregressive_sample(z_gray=z_gray, mode=mode)
        output['auto_%s' % mode] = image
        output['proba'] = proba
        return output
Exemple #18
0
 def testDefaultAccuracy(self):
   # Model returns logits for 3 samples and 5 classes.
   n_sample, n_out = 8, 10
   model = self._create_mock_model(n_out=n_out)
   x = tf.ones((n_sample, 2))
   y = tf.ones((n_sample,), dtype=tf.int32)
   _, acc, total_samples = train_utils.cross_entropy_loss(
       model, (x, y), calculate_accuracy=True)
   self.assertEqual(total_samples, n_sample)
   logits = self.get_logits(n_sample, n_out)
   predictions = tf.cast(tf.argmax(logits, 1), y.dtype)
   acc_obj = tf.keras.metrics.Accuracy()
   acc_obj.update_state(tf.squeeze(y), predictions)
   true_acc = acc_obj.result().numpy()
   self.assertAllClose(acc, true_acc)
    def __call__(self, x, y):
        h1_output = tf.argmax(self.h1(x), axis=1)
        h2_output = self.h2(x)
        h1_diff = h1_output - y
        h1_correct = (h1_diff == 0)
        _, x_support = tf.dynamic_partition(
            x, tf.dtypes.cast(h1_correct, tf.int32), 2)
        _, y_support = tf.dynamic_partition(
            y, tf.dtypes.cast(h1_correct, tf.int32), 2)
        h2_support_output = self.h2(x_support)
        dissonance = self.dissonance(h2_support_output, y_support)
        new_error_loss = self.nll_loss(y,
                                       h2_output) + self.lambda_c * dissonance

        return new_error_loss
Exemple #20
0
def _tf_example_to_step_ds(tf_example: tf.train.Example,
                           episode_length: int) -> Dict[str, Any]:
    """Create an episode from a TF example."""

    # Parse tf.Example.
    def sequence_feature(shape, dtype=tf.float32):
        return tf.io.FixedLenFeature(shape=[episode_length] + shape,
                                     dtype=dtype)

    feature_description = {
        'episode_id': tf.io.FixedLenFeature([], tf.int64),
        'start_idx': tf.io.FixedLenFeature([], tf.int64),
        'episode_return': tf.io.FixedLenFeature([], tf.float32),
        'observations_pixels': sequence_feature([], tf.string),
        'observations_reward': sequence_feature([]),
        # actions are one-hot arrays.
        'observations_action': sequence_feature([15]),
        'actions': sequence_feature([], tf.int64),
        'rewards': sequence_feature([]),
        'discounted_rewards': sequence_feature([]),
        'discounts': sequence_feature([]),
    }

    data = tf.io.parse_single_example(tf_example, feature_description)

    episode = {
        # Episode Metadata
        'episode_id': data['episode_id'],
        'episode_return': data['episode_return'],
        'steps': {
            'observation': {
                'pixels':
                data['observations_pixels'],
                'last_action':
                tf.argmax(data['observations_action'],
                          axis=1,
                          output_type=tf.int64),
                'last_reward':
                data['observations_reward'],
            },
            'action': data['actions'],
            'reward': data['rewards'],
            'discount': data['discounts'],
            'is_first': [True] + [False] * (episode_length - 1),
            'is_terminal': [False] * (episode_length)
        }
    }
    return episode
Exemple #21
0
def test(model, dataset, step_counter):
    """Perform an evaluation of `model` on the examples from `dataset`."""
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    accuracy = tf.keras.metrics.Accuracy('accuracy', dtype=tf.float32)

    for features in dataset:
        images, labels = get_image_labels(features, FLAGS.shuffled_labels)
        logits = model(images, labels, training=False, step=step_counter)
        loss_value, _ = loss(logits, labels)
        avg_loss(loss_value)
        accuracy(tf.argmax(logits, axis=1, output_type=tf.int64),
                 tf.cast(labels, tf.int64))
    print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' %
          (avg_loss.result(), 100 * accuracy.result()))
    with tf.summary.always_record_summaries():
        tf.summary.scalar('loss', avg_loss.result(), step=step_counter)
        tf.summary.scalar('accuracy', accuracy.result(), step=step_counter)
    def accuracy_function(real, pred):
        '''custom accuracy function that masks tokens
            real the real tokens
            pred the predicted tokens

        Returns: tensor with the calculated accuracy
        '''

        accuracies = tf.equal(real, tf.argmax(pred, axis=2))

        mask = tf.math.logical_not(tf.math.equal(real, 0))
        accuracies = tf.math.logical_and(mask, accuracies)

        accuracies = tf.cast(accuracies, dtype=tf.float32)
        mask = tf.cast(mask, dtype=tf.float32)

        return tf.reduce_sum(accuracies) / tf.reduce_sum(mask)
Exemple #23
0
  def sample(self, gray_cond, inputs, mode='argmax'):
    output = dict()
    output['low_res_cond'] = tf.cast(inputs, dtype=tf.uint8)
    logits = self.upsampler(inputs, gray_cond, training=False)

    if mode == 'argmax':
      samples = tf.argmax(logits, axis=-1)
    elif mode == 'sample':
      batch_size, height, width, channels = logits.shape[:-1]
      logits = tf.reshape(logits, (batch_size*height*width*channels, -1))
      samples = tf.random.categorical(logits, num_samples=1,
                                      dtype=tf.int32)[:, 0]
      samples = tf.reshape(samples, (batch_size, height, width, channels))

    samples = tf.cast(samples, dtype=tf.uint8)
    output[f'high_res_{mode}'] = samples
    return output
Exemple #24
0
    def decode_tf(self, ids: tf.Tensor) -> tf.Tensor:
        """Detokenizes int32 Tensor to a string Scalar, up to EOS."""
        valid_ids = tf.constant(ids)

        if self.unk_id is not None:
            valid_ids = tf.where(tf.less(valid_ids, self._base_vocab_size),
                                 valid_ids, self.unk_id)

        if self.eos_id is not None:
            # Argmax always returns the first occurrence.
            first_eos = tf.argmax(tf.equal(valid_ids, self.eos_id))
            valid_ids = tf.cond(
                tf.logical_and(tf.equal(first_eos, 0),
                               tf.not_equal(valid_ids[0], self.eos_id)),
                lambda: valid_ids, lambda: valid_ids[:first_eos])

        return self._decode_tf(valid_ids)
Exemple #25
0
    def compute_loss_and_acc(
        self,
        rnn_output_logits: tf.Tensor,
        batch_features: Dict[str, tf.Tensor],
        batch_labels: Dict[str, tf.Tensor],
    ) -> LanguageModelLoss:
        """
        Args:
            rnn_output_logits: tf.float32 Tensor of shape [B, T, V], representing
                logits as computed by the language model.
            target_token_seq: tf.int32 Tensor of shape [B, T], representing
                the target token sequence.

        Returns:
            LanguageModelLoss tuple, containing both the average per-token loss
            as well as the number of (non-padding) token predictions and how many
            of those were correct.
        
        Note:
            We assume that the two inputs are shifted by one from each other, i.e.,
            that rnn_output_logits[i, t, :] are the logits for sample i after consuming
            input t; hence its target output is assumed to be target_token_seq[i, t+1].
        """

        target_token_seq = tf.cast(batch_labels["target_value"], tf.int32)
        num_graphs = tf.cast(batch_features["num_graphs_in_batch"], tf.float32)

        mask = tf.math.not_equal(
            target_token_seq[:, 1:],
            self.vocab_target.get_id_or_unk(self.vocab_target.get_pad()))

        num_tokens = tf.math.count_nonzero(mask)
        prediction = tf.cast(tf.argmax(rnn_output_logits, 2), tf.int32)
        compared = tf.cast(tf.math.equal(target_token_seq[:, 1:], prediction),
                           tf.int32) * tf.cast(mask, tf.int32)
        num_correct_tokens = tf.math.count_nonzero(compared)

        # 7# Mask out CE loss for padding tokens
        token_ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=tf.boolean_mask(rnn_output_logits, mask),
            labels=tf.boolean_mask(target_token_seq[:, 1:], mask))
        token_ce_loss = tf.reduce_sum(token_ce_loss)

        return LanguageModelLoss(token_ce_loss, num_tokens, num_correct_tokens)
Exemple #26
0
  def sample(self, gray_cond, bit_cond, mode='argmax'):
    output = dict()
    bit_cond_viz = base_utils.convert_bits(bit_cond, n_bits_in=3, n_bits_out=8)
    output['bit_cond'] = tf.cast(bit_cond_viz, dtype=tf.uint8)

    logits = self.upsampler(bit_cond, gray_cond, training=False)

    if mode == 'argmax':
      samples = tf.argmax(logits, axis=-1)
    elif mode == 'sample':
      batch_size, height, width, channels = logits.shape[:-1]
      logits = tf.reshape(logits, (batch_size*height*width*channels, -1))
      samples = tf.random.categorical(logits, num_samples=1,
                                      dtype=tf.int32)[:, 0]
      samples = tf.reshape(samples, (batch_size, height, width, channels))

    samples = tf.cast(samples, dtype=tf.uint8)
    output[f'bit_up_{mode}'] = samples
    return output
    def compute_loss_and_acc(self, rnn_output_logits: tf.Tensor,
                             target_token_seq: tf.Tensor) -> LanguageModelLoss:
        """
        Args:
            rnn_output_logits: tf.float32 Tensor of shape [B, T, V], representing
                logits as computed by the language model.
            target_token_seq: tf.int32 Tensor of shape [B, T], representing
                the target token sequence.

        Returns:
            LanguageModelLoss tuple, containing both the average per-token loss
            as well as the number of (non-padding) token predictions and how many
            of those were correct.
        
        Note:
            We assume that the two inputs are shifted by one from each other, i.e.,
            that rnn_output_logits[i, t, :] are the logits for sample i after consuming
            input t; hence its target output is assumed to be target_token_seq[i, t+1].
        """
        # TODO 5# 4) Compute CE loss for all but the last timestep:
        token_ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=target_token_seq[:, 1:],
            logits=rnn_output_logits[:, :-1, :])
        # token_ce_loss = tf.reduce_mean(token_ce_loss) becomes redundant, because I do it at TODO 7

        # TODO 6# Compute number of (correct) predictions
        pad_id = self.vocab.get_id_or_unk(self.vocab.get_pad())
        mask = tf.logical_not(tf.equal(target_token_seq, pad_id))[:, 1:]

        # compute predictions correctness and drop the padding by applying the mask
        predictions_status = tf.boolean_mask(
            tf.equal(target_token_seq[:, 1:],
                     tf.argmax(rnn_output_logits[:, :-1], axis=2)), mask)

        num_tokens = len(predictions_status)
        num_correct_tokens = tf.math.count_nonzero(predictions_status,
                                                   dtype=tf.float32)

        # TODO 7# Mask out CE loss for padding tokens
        token_ce_loss = tf.boolean_mask(token_ce_loss, mask)
        token_ce_loss = tf.reduce_mean(token_ce_loss)

        return LanguageModelLoss(token_ce_loss, num_tokens, num_correct_tokens)
Exemple #28
0
def argmax(a, axis=None):
    """Returns the indices of the maximum values along an array axis.

  Args:
    a: array_like. Could be an ndarray, a Tensor or any object that can
      be converted to a Tensor using `tf.convert_to_tensor`.
    axis: Optional. The axis along which to compute argmax. If None, index of
      the max element in the flattened array is returned.
  Returns:
    An ndarray with the same shape as `a` with `axis` removed if not None.
    If `axis` is None, a scalar array is returned.
  """
    a = array_creation.asarray(a)
    if axis is None or utils.isscalar(a):
        # When axis is None or the array is a scalar, numpy flattens the array.
        a_t = tf.reshape(a.data, [-1])
    else:
        a_t = a.data
    return utils.tensor_to_ndarray(tf.argmax(input=a_t, axis=axis))
Exemple #29
0
        def _reshape_tensors(data):
            data['note_active_frame_indices'] = tf.reshape(
                data['note_active_frame_indices'], (-1, 128))
            data['note_active_velocities'] = tf.reshape(
                data['note_active_velocities'], (-1, 128))
            data['instrument_id'] = inst_vocab.lookup(data['instrument_id'])
            data['midi'] = tf.argmax(data['note_active_frame_indices'],
                                     axis=-1)
            data['f0_hz'] = data['f0_hz'][..., tf.newaxis]
            data['loudness_db'] = data['loudness_db'][..., tf.newaxis]
            onsets = tf.reduce_sum(tf.reshape(data['note_onsets'], (-1, 128)),
                                   axis=-1)
            data['onsets'] = tf.cast(onsets > 0, tf.int64)
            offsets = tf.reduce_sum(tf.reshape(data['note_offsets'],
                                               (-1, 128)),
                                    axis=-1)
            data['offsets'] = tf.cast(offsets > 0, tf.int64)

            return data
Exemple #30
0
    def select_actor_action(self, env_output, agent_output):
        assert self._mode, 'mode must be set for selecting action in actor.'
        oracle_next_action = env_output.observation[
            streetview_constants.ORACLE_NEXT_ACTION]
        if self._mode == 'train':
            if self._loss_type == common.CE_LOSS:
                # This is teacher-forcing mode, so choose action same as oracle action.
                action_idx = oracle_next_action
            elif self._loss_type == common.AC_LOSS:
                action_idx = tfp.distributions.Categorical(
                    logits=agent_output.policy_logits).sample()
        else:
            # In non-train modes, choose greedily.
            action_idx = tf.argmax(agent_output.policy_logits, axis=-1)

        # Return ActorAction and the action to be passed to the env step function.
        return common.ActorAction(
            chosen_action_idx=int(action_idx.numpy()),
            oracle_next_action_idx=int(
                oracle_next_action.numpy())), action_idx.numpy()