def train(self, sentences):
    token_ids, token_values, token_dense_shape = self._tokenize(sentences)
    tokens_sparse = tf.sparse.SparseTensor(
        indices=token_ids, values=token_values, dense_shape=token_dense_shape)
    tokens = tf.sparse.to_dense(tokens_sparse, default_value="")

    sparse_lookup_ids = tf.sparse.SparseTensor(
        indices=tokens_sparse.indices,
        values=self._words_to_indices(tokens_sparse.values),
        dense_shape=tokens_sparse.dense_shape)
    lookup_ids = tf.sparse.to_dense(sparse_lookup_ids, default_value=0)

    # Targets are the next word for each word of the sentence.
    tokens_ids_seq = lookup_ids[:, 0:-1]
    tokens_ids_target = lookup_ids[:, 1:]

    tokens_prefix = tokens[:, 0:-1]

    # Mask determining which positions we care about for a loss: all positions
    # that have a valid non-terminal token.
    mask = tf.logical_and(
        tf.logical_not(tf.equal(tokens_prefix, "")),
        tf.logical_not(tf.equal(tokens_prefix, "<E>")))

    input_mask = tf.cast(mask, tf.int32)

    with tf.GradientTape() as t:
      sentence_embeddings = tf.nn.embedding_lookup(self._embeddings,
                                                   tokens_ids_seq)

      lstm_initial_state = self._lstm_cell.get_initial_state(
          sentence_embeddings)

      lstm_output = self._rnn_layer(
          inputs=sentence_embeddings, initial_state=lstm_initial_state)

      # Stack LSTM outputs into a batch instead of a 2D array.
      lstm_output = tf.reshape(lstm_output, [-1, self._lstm_cell.output_size])

      logits = self._logit_layer(lstm_output)

      targets = tf.reshape(tokens_ids_target, [-1])
      weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32)

      losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=targets, logits=logits)

      # Final loss is the mean loss for all token losses.
      final_loss = tf.math.divide(
          tf.reduce_sum(tf.multiply(losses, weights)),
          tf.reduce_sum(weights),
          name="final_loss")

    watched = t.watched_variables()
    gradients = t.gradient(final_loss, watched)

    for w, g in zip(watched, gradients):
      w.assign_sub(g)

    return final_loss
Beispiel #2
0
 def _multiply(self, x, y):
     if self._use_operator:
         return x * y
     else:
         return tf.multiply(x, y)
Beispiel #3
0
def _train_op_fn(loss,
                 optimizer_fn,
                 l2_regularization=-1,
                 gradient_max_norm=-1,
                 use_synchronous_optimizer=False):
    """Returns the op to optimize the loss.

  Supports l2 regularization, learning rate decay and gradient clipping.

  Args:
    loss: The training loss before regularization.
    optimizer_fn: the optimization function.
    l2_regularization: a float that will multiply the l2 weight norms in the
      loss function.
    gradient_max_norm: a float - maximal gradient update allowed.
    use_synchronous_optimizer: a bool whether to use synchronous optimization.

  Returns:
    `ModelSpec` with logits, loss, train_ops and train_hooks.
  """
    total_loss = loss
    if l2_regularization > 0:
        weight_losses = [
            tf.multiply(tf.nn.l2_loss(weight),
                        l2_regularization,
                        name="l2_weight_loss")
            for weight in tf.compat.v1.trainable_variables()
        ]
        total_loss = tf.add_n(weight_losses + [loss], name="total_loss")

    global_step = tf.compat.v1.train.get_or_create_global_step()

    opt = optimizer_fn()

    train_hooks = []
    if use_synchronous_optimizer:
        config = tf.estimator.RunConfig()
        workers = config.num_worker_replicas + 1
        tolerance = _compute_tolerance(workers)
        to_aggregate = workers - tolerance
        opt = tf.compat.v1.train.SyncReplicasOptimizer(
            opt,
            replicas_to_aggregate=to_aggregate,
            total_num_replicas=workers)
        sync_replicas_hook = opt.make_session_run_hook(config.is_chief)
        train_hooks.append(sync_replicas_hook)

    tvars = tf.compat.v1.trainable_variables()
    grads_and_vars = opt.compute_gradients(loss=total_loss, var_list=tvars)
    # TODO(b/172564129): switch to tf.contrib.estimator.clip_gradients_by_norm
    if gradient_max_norm > 0.0:
        grads = [gv[0] for gv in grads_and_vars]
        tvars = [gv[1] for gv in grads_and_vars]
        grads, _ = tf.clip_by_global_norm(grads, gradient_max_norm)
        grads_and_vars = list(zip(grads, tvars))

    if use_synchronous_optimizer:
        apply_gradients_op = opt.apply_gradients(grads_and_vars, global_step)
    else:
        apply_gradients_op = opt.apply_gradients(grads_and_vars)

    update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        return tf.group(apply_gradients_op), train_hooks
Beispiel #4
0
def fft_convolve(audio: tf.Tensor,
                 impulse_response: tf.Tensor,
                 padding: Text = 'same',
                 delay_compensation: int = -1) -> tf.Tensor:
    """Filter audio with frames of time-varying impulse responses.

  Time-varying filter. Given audio [batch, n_samples], and a series of impulse
  responses [batch, n_frames, n_impulse_response], splits the audio into frames,
  applies filters, and then overlap-and-adds audio back together.
  Applies non-windowed non-overlapping STFT/ISTFT to efficiently compute
  convolution for large impulse response sizes.

  Args:
    audio: Input audio. Tensor of shape [batch, audio_timesteps].
    impulse_response: Finite impulse response to convolve. Can either be a 2-D
      Tensor of shape [batch, ir_size], or a 3-D Tensor of shape [batch,
      ir_frames, ir_size]. A 2-D tensor will apply a single linear
      time-invariant filter to the audio. A 3-D Tensor will apply a linear
      time-varying filter. Automatically chops the audio into equally shaped
      blocks to match ir_frames.
    padding: Either 'valid' or 'same'. For 'same' the final output to be the
      same size as the input audio (audio_timesteps). For 'valid' the audio is
      extended to include the tail of the impulse response (audio_timesteps +
      ir_timesteps - 1).
    delay_compensation: Samples to crop from start of output audio to compensate
      for group delay of the impulse response. If delay_compensation is less
      than 0 it defaults to automatically calculating a constant group delay of
      the windowed linear phase filter from frequency_impulse_response().

  Returns:
    audio_out: Convolved audio. Tensor of shape
        [batch, audio_timesteps + ir_timesteps - 1] ('valid' padding) or shape
        [batch, audio_timesteps] ('same' padding).

  Raises:
    ValueError: If audio and impulse response have different batch size.
    ValueError: If audio cannot be split into evenly spaced frames. (i.e. the
      number of impulse response frames is on the order of the audio size and
      not a multiple of the audio size.)
  """
    audio, impulse_response = tf_float32(audio), tf_float32(impulse_response)

    # Add a frame dimension to impulse response if it doesn't have one.
    ir_shape = impulse_response.shape.as_list()
    if len(ir_shape) == 2:
        impulse_response = impulse_response[:, tf.newaxis, :]
        ir_shape = impulse_response.shape.as_list()

    # Get shapes of audio and impulse response.
    batch_size_ir, n_ir_frames, ir_size = ir_shape
    batch_size, audio_size = audio.shape.as_list()

    # Validate that batch sizes match.
    if batch_size != batch_size_ir:
        raise ValueError(
            'Batch size of audio ({}) and impulse response ({}) must '
            'be the same.'.format(batch_size, batch_size_ir))

    # Cut audio into frames.
    frame_size = int(np.ceil(audio_size / n_ir_frames))
    hop_size = frame_size
    audio_frames = tf.signal.frame(audio, frame_size, hop_size, pad_end=True)

    # Check that number of frames match.
    n_audio_frames = int(audio_frames.shape[1])
    if n_audio_frames != n_ir_frames:
        raise ValueError(
            'Number of Audio frames ({}) and impulse response frames ({}) do not '
            'match. For small hop size = ceil(audio_size / n_ir_frames), '
            'number of impulse response frames must be a multiple of the audio '
            'size.'.format(n_audio_frames, n_ir_frames))

    # Pad and FFT the audio and impulse responses.
    fft_size = get_fft_size(frame_size, ir_size, power_of_2=True)
    audio_fft = tf.signal.rfft(audio_frames, [fft_size])
    ir_fft = tf.signal.rfft(impulse_response, [fft_size])

    # Multiply the FFTs (same as convolution in time).
    audio_ir_fft = tf.multiply(audio_fft, ir_fft)

    # Take the IFFT to resynthesize audio.
    audio_frames_out = tf.signal.irfft(audio_ir_fft)
    audio_out = tf.signal.overlap_and_add(audio_frames_out, hop_size)

    # Crop and shift the output audio.
    return crop_and_compensate_delay(audio_out, audio_size, ir_size, padding,
                                     delay_compensation)
Beispiel #5
0
 def mul_or_and(x1, x2):
     if x1.dtype == tf.bool:
         assert x2.dtype == tf.bool
         return tf.logical_and(x1, x2)
     return tf.multiply(x1, x2)
    def draw_samples(self, alpha, scale):
        r"""Draw samples from the robust distribution.

    This function implements Algorithm 1 the paper. This code is written to
    allow for sampling from a set of different distributions, each parametrized
    by its own alpha and scale values, as opposed to the more standard approach
    of drawing N samples from the same distribution. This is done by repeatedly
    performing N instances of rejection sampling for each of the N distributions
    until at least one proposal for each of the N distributions has been
    accepted. All samples assume a zero mean --- to get non-zero mean samples,
    just add each mean to each sample.

    Args:
      alpha: A TF tensor/scalar or numpy array/scalar of floats where each
        element is the shape parameter of that element's distribution.
      scale: A TF tensor/scalar or numpy array/scalar of floats where each
        element is the scale parameter of that element's distribution. Must be
        the same shape as `alpha`.

    Returns:
      A TF tensor with the same shape and precision as `alpha` and `scale` where
      each element is a sample drawn from the zero-mean distribution specified
      for that element by `alpha` and `scale`.
    """
        # `scale` must have the same type as `alpha`.
        float_dtype = alpha.dtype
        tf.debugging.assert_type(scale, float_dtype)
        assert_ops = [
            # `scale` must be > 0.
            tf.Assert(tf.reduce_all(scale > 0.), [scale]),
            # `alpha` must be >= 0.
            tf.Assert(tf.reduce_all(alpha >= 0.), [alpha]),
            # `alpha` and `scale` must have the same shape.
            tf.Assert(
                tf.reduce_all(tf.equal(tf.shape(alpha), tf.shape(scale))),
                [tf.shape(alpha), tf.shape(scale)]),
        ]

        with tf.control_dependencies(assert_ops):
            shape = tf.shape(alpha)

            # The distributions we will need for rejection sampling. The sqrt(2)
            # scaling of the Cauchy distribution corrects for our differing
            # conventions for standardization.
            cauchy = tfp.distributions.Cauchy(loc=0., scale=tf.sqrt(2.))
            uniform = tfp.distributions.Uniform(low=0., high=1.)

            def while_cond(_, accepted):
                """Terminate the loop only when all samples have been accepted."""
                return ~tf.reduce_all(accepted)

            def while_body(samples, accepted):
                """Generate N proposal samples, and then perform rejection sampling."""
                # Draw N samples from a Cauchy, our proposal distribution.
                cauchy_sample = tf.cast(cauchy.sample(shape), float_dtype)

                # Compute the likelihood of each sample under its target distribution.
                nll = self.nllfun(cauchy_sample, alpha,
                                  tf.cast(1, float_dtype))
                # Bound the NLL. We don't use the approximate loss as it may cause
                # unpredictable behavior in the context of sampling.
                nll_bound = general.lossfun(
                    cauchy_sample,
                    tf.cast(0, float_dtype),
                    tf.cast(1, float_dtype),
                    approximate=False) + self.log_base_partition_function(
                        alpha)

                # Draw N samples from a uniform distribution, and use each uniform
                # sample to decide whether or not to accept each proposal sample.
                uniform_sample = tf.cast(uniform.sample(shape), float_dtype)
                accept = uniform_sample <= tf.math.exp(nll_bound - nll)

                # If a sample is accepted, replace its element in `samples` with the
                # proposal sample, and set its bit in `accepted` to True.
                samples = tf.where(accept, cauchy_sample, samples)
                accepted = accept | accepted
                return (samples, accepted)

            # Initialize the loop. The first item does not matter as it will get
            # overwritten, the second item must be all False.
            while_loop_vars = (tf.zeros(shape, float_dtype),
                               tf.zeros(shape, dtype=bool))

            # Perform rejection sampling until all N samples have been accepted.
            terminal_state = tf.while_loop(cond=while_cond,
                                           body=while_body,
                                           loop_vars=while_loop_vars)

            # Because our distribution is a location-scale family, we sample from
            # p(x | 0, \alpha, 1) and then scale each sample by `scale`.
            samples = tf.multiply(terminal_state[0], scale)

            return samples
Beispiel #7
0
def update_confusion_matrix_variables(
    variables_to_update,
    y_true,
    y_pred,
    thresholds,
    top_k=None,
    class_id=None,
    sample_weight=None,
    multi_label=False,
    label_weights=None,
    thresholds_distributed_evenly=False,
):
    """Returns op to update the given confusion matrix variables.

    For every pair of values in y_true and y_pred:

    true_positive: y_true == True and y_pred > thresholds
    false_negatives: y_true == True and y_pred <= thresholds
    true_negatives: y_true == False and y_pred <= thresholds
    false_positive: y_true == False and y_pred > thresholds

    The results will be weighted and added together. When multiple thresholds are
    provided, we will repeat the same for every threshold.

    For estimation of these metrics over a stream of data, the function creates an
    `update_op` operation that updates the given variables.

    If `sample_weight` is `None`, weights default to 1.
    Use weights of 0 to mask values.

    Args:
      variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
        and corresponding variables to update as values.
      y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
      y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
        the range `[0, 1]`.
      thresholds: A float value, float tensor, python list, or tuple of float
        thresholds in `[0, 1]`, or NEG_INF (used when top_k is set).
      top_k: Optional int, indicates that the positive labels should be limited to
        the top k predictions.
      class_id: Optional int, limits the prediction and labels to the class
        specified by this argument.
      sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
        `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
        be either `1`, or the same as the corresponding `y_true` dimension).
      multi_label: Optional boolean indicating whether multidimensional
        prediction/labels should be treated as multilabel responses, or flattened
        into a single label. When True, the valus of `variables_to_update` must
        have a second dimension equal to the number of labels in y_true and
        y_pred, and those tensors must not be RaggedTensors.
      label_weights: (optional) tensor of non-negative weights for multilabel
        data. The weights are applied when calculating TP, FP, FN, and TN without
        explicit multilabel handling (i.e. when the data is to be flattened).
      thresholds_distributed_evenly: Boolean, whether the thresholds are evenly
        distributed within the list. An optimized method will be used if this is
        the case. See _update_confusion_matrix_variables_optimized() for more
        details.

    Returns:
      Update op.

    Raises:
      ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
        `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
        `variables_to_update` contains invalid keys.
    """
    if multi_label and label_weights is not None:
        raise ValueError(
            "`label_weights` for multilabel data should be handled "
            "outside of `update_confusion_matrix_variables` when "
            "`multi_label` is True.")
    if variables_to_update is None:
        return
    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            "Please provide at least one valid confusion matrix "
            "variable to update. Valid variable key options are: "
            f'"{list(ConfusionMatrix)}". Received: "{variables_to_update.keys()}"'
        )

    variable_dtype = list(variables_to_update.values())[0].dtype

    y_true = tf.cast(y_true, dtype=variable_dtype)
    y_pred = tf.cast(y_pred, dtype=variable_dtype)

    if thresholds_distributed_evenly:
        # Check whether the thresholds has any leading or tailing epsilon added
        # for floating point imprecision. The leading and tailing threshold will be
        # handled bit differently as the corner case.
        # At this point, thresholds should be a list/array with more than 2 items,
        # and ranged between [0, 1]. See is_evenly_distributed_thresholds() for more
        # details.
        thresholds_with_epsilon = thresholds[0] < 0.0 or thresholds[-1] > 1.0

    thresholds = tf.convert_to_tensor(thresholds, dtype=variable_dtype)
    num_thresholds = thresholds.shape.as_list()[0]

    if multi_label:
        one_thresh = tf.equal(
            tf.cast(1, dtype=tf.int32),
            tf.rank(thresholds),
            name="one_set_of_thresholds_cond",
        )
    else:
        [y_pred, y_true
         ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
        one_thresh = tf.cast(True, dtype=tf.bool)

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            f'Invalid keys: "{invalid_keys}". '
            f'Valid variable key options are: "{list(ConfusionMatrix)}"')

    if sample_weight is None:
        y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
            y_pred, y_true)
    else:
        sample_weight = tf.cast(sample_weight, dtype=variable_dtype)
        (
            y_pred,
            y_true,
            sample_weight,
        ) = losses_utils.squeeze_or_expand_dimensions(
            y_pred, y_true, sample_weight=sample_weight)
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    if thresholds_distributed_evenly:
        return _update_confusion_matrix_variables_optimized(
            variables_to_update,
            y_true,
            y_pred,
            thresholds,
            multi_label=multi_label,
            sample_weights=sample_weight,
            label_weights=label_weights,
            thresholds_with_epsilon=thresholds_with_epsilon,
        )

    pred_shape = tf.shape(y_pred)
    num_predictions = pred_shape[0]
    if y_pred.shape.ndims == 1:
        num_labels = 1
    else:
        num_labels = tf.math.reduce_prod(pred_shape[1:], axis=0)
    thresh_label_tile = tf.where(one_thresh, num_labels,
                                 tf.ones([], dtype=tf.int32))

    # Reshape predictions and labels, adding a dim for thresholding.
    if multi_label:
        predictions_extra_dim = tf.expand_dims(y_pred, 0)
        labels_extra_dim = tf.expand_dims(tf.cast(y_true, dtype=tf.bool), 0)
    else:
        # Flatten predictions and labels when not multilabel.
        predictions_extra_dim = tf.reshape(y_pred, [1, -1])
        labels_extra_dim = tf.reshape(tf.cast(y_true, dtype=tf.bool), [1, -1])

    # Tile the thresholds for every prediction.
    if multi_label:
        thresh_pretile_shape = [num_thresholds, 1, -1]
        thresh_tiles = [1, num_predictions, thresh_label_tile]
        data_tiles = [num_thresholds, 1, 1]
    else:
        thresh_pretile_shape = [num_thresholds, -1]
        thresh_tiles = [1, num_predictions * num_labels]
        data_tiles = [num_thresholds, 1]

    thresh_tiled = tf.tile(tf.reshape(thresholds, thresh_pretile_shape),
                           tf.stack(thresh_tiles))

    # Tile the predictions for every threshold.
    preds_tiled = tf.tile(predictions_extra_dim, data_tiles)

    # Compare predictions and threshold.
    pred_is_pos = tf.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = tf.tile(labels_extra_dim, data_tiles)

    if sample_weight is not None:
        sample_weight = tf.__internal__.ops.broadcast_weights(
            tf.cast(sample_weight, dtype=variable_dtype), y_pred)
        weights_tiled = tf.tile(tf.reshape(sample_weight, thresh_tiles),
                                data_tiles)
    else:
        weights_tiled = None

    if label_weights is not None and not multi_label:
        label_weights = tf.expand_dims(label_weights, 0)
        label_weights = tf.__internal__.ops.broadcast_weights(
            label_weights, y_pred)
        label_weights_tiled = tf.tile(tf.reshape(label_weights, thresh_tiles),
                                      data_tiles)
        if weights_tiled is None:
            weights_tiled = label_weights_tiled
        else:
            weights_tiled = tf.multiply(weights_tiled, label_weights_tiled)

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = tf.cast(tf.logical_and(label, pred), dtype=var.dtype)
        if weights is not None:
            label_and_pred *= tf.cast(weights, dtype=var.dtype)
        return var.assign_add(tf.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = tf.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = tf.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (
                label_is_neg,
                pred_is_neg,
            )

    for matrix_cond, (label, pred) in loop_vars.items():

        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))

    return tf.group(update_ops)
Beispiel #8
0
 def test_false(self):
     x = tf.constant(4)
     y = tf.constant(3)
     z = ps.cond(False, lambda: tf.multiply(x, 16),
                 lambda: tf.multiply(y, 3))
     self.assertEqual(self.evaluate(z), 9)
Beispiel #9
0
def compute_weighted_loss(
    losses,
    sample_weight=None,
    reduction=ReductionV2.SUM_OVER_BATCH_SIZE,
    name=None,
):
    """Computes the weighted loss.

    Args:
      losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
      sample_weight: Optional `Tensor` whose rank is either 0, or the same rank
        as `losses`, or be broadcastable to `losses`.
      reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `SUM_OVER_BATCH_SIZE`.
      name: Optional name for the op.

    Raises:
      ValueError: If the shape of `sample_weight` is not compatible with
        `losses`.

    Returns:
      Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
      `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
    """
    ReductionV2.validate(reduction)

    # If this function is called directly, then we just default 'AUTO' to
    # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases.
    if reduction == ReductionV2.AUTO:
        reduction = ReductionV2.SUM_OVER_BATCH_SIZE
    if sample_weight is None:
        sample_weight = 1.0
    with backend.name_scope(name or "weighted_loss"):
        # Save the `reduction` argument for loss normalization when distributing
        # to multiple replicas. Used only for estimator + v1 optimizer flow.
        tf.compat.v1.get_default_graph()._last_loss_reduction = reduction

        if not isinstance(losses, (keras_tensor.KerasTensor, tf.RaggedTensor)):
            losses = tf.convert_to_tensor(losses)

        if not isinstance(
            sample_weight, (keras_tensor.KerasTensor, tf.RaggedTensor)
        ):
            sample_weight = tf.convert_to_tensor(sample_weight)

        # Convert any non float dtypes to floats, to avoid it loss any precision
        # for dtype like int or bool.
        if not losses.dtype.is_floating:
            input_dtype = losses.dtype
            losses = tf.cast(losses, "float32")
            input_casted = True
        else:
            input_casted = False
        sample_weight = tf.cast(sample_weight, losses.dtype)
        # Update dimensions of `sample_weight` to match with `losses` if
        # possible.
        (
            losses,
            _,
            sample_weight,
        ) = squeeze_or_expand_dimensions(losses, None, sample_weight)
        weighted_losses = tf.multiply(losses, sample_weight)

        # Apply reduction function to the individual weighted losses.
        loss = reduce_weighted_loss(weighted_losses, reduction)
        if input_casted:
            # Convert the result back to the input type.
            loss = tf.cast(loss, input_dtype)
        return loss
def selective_crop_and_resize(features,
                              boxes,
                              box_levels,
                              boundaries,
                              output_size=7,
                              sample_offset=0.5,
                              use_einsum_gather=False):
    """Crop and resize boxes on a set of feature maps.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, it selectively crops and resizes
  boxes from the corresponding feature maps to generate the box features.

  We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
  figure 3 for reference). Specifically, for each feature map, we select an
  (output_size, output_size) set of pixels corresponding to the box location,
  and then use bilinear interpolation to select the feature value for each
  pixel.

  For performance, we perform the gather and interpolation on all layers as a
  single operation. In this op the multi-level features are first stacked and
  gathered into [2*output_size, 2*output_size] feature points. Then bilinear
  interpolation is performed on the gathered feature points to generate
  [output_size, output_size] RoIAlign feature map.

  Here is the step-by-step algorithm:
    1. The multi-level features are gathered into a
       [batch_size, num_boxes, output_size*2, output_size*2, num_filters]
       Tensor. The Tensor contains four neighboring feature points for each
       vertice in the output grid.
    2. Compute the interpolation kernel of shape
       [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
       can be seen as stacking 2x2 interpolation kernels for all vertices in the
       output grid.
    3. Element-wise multiply the gathered features and interpolation kernel.
       Then apply 2x2 average pooling to reduce spatial dimension to
       output_size.

  Args:
    features: a 5-D tensor of shape [batch_size, num_levels, max_height,
      max_width, num_filters] where cropping and resizing are based.
    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
      information of each box w.r.t. the corresponding feature map.
      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
        in terms of the number of pixels of the corresponding feature map size.
    box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
      the 0-based corresponding feature level index of each box.
    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
      the boundary (in (y, x)) of the corresponding feature map for each box.
      Any resampled grid points that go beyond the bounary will be clipped.
    output_size: a scalar indicating the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.
    use_einsum_gather: use einsum to replace gather or not. Replacing einsum
      with gather can improve performance when feature size is not large, einsum
      is friendly with model partition as well. Gather's performance is better
      when feature size is very large and there are multiple box levels.

  Returns:
    features_per_box: a 5-D tensor of shape
      [batch_size, num_boxes, output_size, output_size, num_filters]
      representing the cropped features.
  """
    (batch_size, num_levels, max_feature_height, max_feature_width,
     num_filters) = features.get_shape().as_list()
    _, num_boxes, _ = boxes.get_shape().as_list()

    kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions(
        boxes, boundaries, output_size, sample_offset)
    x_indices = tf.cast(tf.reshape(box_gridx0x1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)
    y_indices = tf.cast(tf.reshape(box_gridy0y1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)

    if use_einsum_gather:
        # Blinear interpolation is done during the last two gathers:
        #        f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
        #                              [f10, f11]]
        #        [[f00, f01],
        #         [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot)
        #       where [hy, ly] and [hx, lx] are the bilinear interpolation kernel.

        # shape is [batch_size, boxes, output_size, 2, 1]
        grid_y_one_hot, grid_x_one_hot = get_grid_one_hot(
            box_gridy0y1, box_gridx0x1, max_feature_height, max_feature_width)

        # shape is [batch_size, num_boxes, output_size, height]
        grid_y_weight = tf.reduce_sum(tf.multiply(grid_y_one_hot, kernel_y),
                                      axis=-2)
        # shape is [batch_size, num_boxes, output_size, width]
        grid_x_weight = tf.reduce_sum(tf.multiply(grid_x_one_hot, kernel_x),
                                      axis=-2)

        # Gather for y_axis.
        # shape is [batch_size, num_boxes, output_size, width, features]
        features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features,
                                     tf.cast(grid_y_weight, features.dtype))
        # Gather for x_axis.
        # shape is [batch_size, num_boxes, output_size, output_size, features]
        features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box,
                                     tf.cast(grid_x_weight, features.dtype))
    else:
        height_dim_offset = max_feature_width
        level_dim_offset = max_feature_height * height_dim_offset
        batch_dim_offset = num_levels * level_dim_offset

        batch_size_offset = tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_offset,
                [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2])
        box_levels_offset = tf.tile(
            tf.reshape(box_levels * level_dim_offset,
                       [batch_size, num_boxes, 1, 1]),
            [1, 1, output_size * 2, output_size * 2])
        y_indices_offset = tf.tile(
            tf.reshape(y_indices * height_dim_offset,
                       [batch_size, num_boxes, output_size * 2, 1]),
            [1, 1, 1, output_size * 2])
        x_indices_offset = tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1])

        indices = tf.reshape(
            batch_size_offset + box_levels_offset + y_indices_offset +
            x_indices_offset, [-1])

        features = tf.reshape(features, [-1, num_filters])
        # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
        # performance.
        features_per_box = tf.reshape(tf.gather(features, indices), [
            batch_size, num_boxes, output_size * 2, output_size * 2,
            num_filters
        ])
        features_per_box = feature_bilinear_interpolation(
            features_per_box, kernel_y, kernel_x)

    return features_per_box
Beispiel #11
0
def bigbird_block_sparse_attention(query_layer,
                                   key_layer,
                                   value_layer,
                                   band_mask,
                                   from_mask,
                                   to_mask,
                                   from_blocked_mask,
                                   to_blocked_mask,
                                   num_attention_heads,
                                   num_rand_blocks,
                                   size_per_head,
                                   batch_size,
                                   from_seq_length,
                                   to_seq_length,
                                   from_block_size,
                                   to_block_size,
                                   seed=None,
                                   plan_from_length=None,
                                   plan_num_rand_blocks=None):
    """BigBird attention sparse calculation using blocks in linear time.

  Assumes from_seq_length//from_block_size == to_seq_length//to_block_size.


  Args:
    query_layer: float Tensor of shape [batch_size, num_attention_heads,
      from_seq_length, size_per_head]
    key_layer: float Tensor of shape [batch_size, num_attention_heads,
      to_seq_length, size_per_head]
    value_layer: float Tensor of shape [batch_size, num_attention_heads,
      to_seq_length, size_per_head]
    band_mask: (optional) int32 Tensor of shape [batch_size, 1,
      from_seq_length//from_block_size-4, from_block_size, 3*to_block_size].
      The values should be 1 or 0. The attention scores will effectively be
      set to -infinity for any positions in the mask that are 0, and will be
      unchanged for positions that are 1.
    from_mask: (optional) int32 Tensor of shape [batch_size, 1,
      from_seq_length, 1]. The values should be 1 or 0. The
      attention scores will effectively be set to -infinity for any positions in
      the mask that are 0, and will be unchanged for positions that are 1.
    to_mask: (optional) int32 Tensor of shape [batch_size, 1, 1,
      to_seq_length]. The values should be 1 or 0. The
      attention scores will effectively be set to -infinity for any positions in
      the mask that are 0, and will be unchanged for positions that are 1.
    from_blocked_mask: (optional) int32 Tensor of shape [batch_size,
      from_seq_length//from_block_size, from_block_size].
      Same as from_mask, just reshaped.
    to_blocked_mask: (optional) int32 Tensor of shape [batch_size,
      to_seq_length//to_block_size, to_block_size].
      Same as to_mask, just reshaped.
    num_attention_heads: int. Number of attention heads.
    num_rand_blocks: int. Number of random chunks per row.
    size_per_head: int. Size of each attention head.
    batch_size: int. Batch size for computation.
    from_seq_length: int. length of from sequence.
    to_seq_length: int. length of to sequence.
    from_block_size: int. size of block in from sequence.
    to_block_size: int. size of block in to sequence.
    seed: (Optional) int. Reandom seed for generating random mask.
    plan_from_length: (Optional) list. Plan of where to put random attn. It
      divides the block matrix into chuncks, where each chunck will have
      some randomm attn.
    plan_num_rand_blocks: (Optional) list. Number of random per block given by
      plan_from_length.

  Returns:
    float Tensor of shape [batch_size, from_seq_length, num_attention_heads,
      size_per_head].
  """
    assert from_seq_length // from_block_size == to_seq_length // to_block_size

    # cast masks to float
    from_mask = tf.cast(from_mask, tf.float32)
    to_mask = tf.cast(to_mask, tf.float32)
    band_mask = tf.cast(band_mask, tf.float32)
    from_blocked_mask = tf.cast(from_blocked_mask, tf.float32)
    to_blocked_mask = tf.cast(to_blocked_mask, tf.float32)

    # generate random attention and corresponding masks
    np.random.seed(seed)
    if from_seq_length in [1024, 3072, 4096]:  # old plans used in paper
        rand_attn = [
            bigbird_block_rand_mask(  # pylint: disable=g-complex-comprehension
                MAX_SEQ_LEN,
                MAX_SEQ_LEN,
                from_block_size,
                to_block_size,
                num_rand_blocks,
                last_idx=1024)[:(from_seq_length // from_block_size - 2)]
            for _ in range(num_attention_heads)
        ]
    else:
        if plan_from_length is None:
            plan_from_length, plan_num_rand_blocks = get_rand_attn_plan(
                from_seq_length, from_block_size, num_rand_blocks)

        rand_attn = bigbird_block_rand_mask_with_head(
            from_seq_length=from_seq_length,
            to_seq_length=to_seq_length,
            from_block_size=from_block_size,
            to_block_size=to_block_size,
            num_heads=num_attention_heads,
            plan_from_length=plan_from_length,
            plan_num_rand_blocks=plan_num_rand_blocks)
    rand_attn = np.stack(rand_attn, axis=0)
    rand_attn = tf.constant(rand_attn, dtype=tf.int32)
    rand_attn = tf.expand_dims(rand_attn, 0)
    rand_attn = tf.repeat(rand_attn, batch_size, 0)

    rand_mask = create_rand_mask_from_inputs(
        from_blocked_mask,
        to_blocked_mask,
        rand_attn,
        num_attention_heads,
        num_rand_blocks,
        batch_size,
        from_seq_length,
        from_block_size,
    )

    # Define shorthands
    h = num_attention_heads
    r = num_rand_blocks
    d = size_per_head
    b = batch_size
    m = from_seq_length
    n = to_seq_length
    wm = from_block_size
    wn = to_block_size

    blocked_query_matrix = tf.reshape(query_layer, (b, h, m // wm, wm, -1))
    blocked_key_matrix = tf.reshape(key_layer, (b, h, n // wn, wn, -1))
    blocked_value_matrix = tf.reshape(value_layer, (b, h, n // wn, wn, -1))
    gathered_key = tf.reshape(
        tf.gather(blocked_key_matrix,
                  rand_attn,
                  batch_dims=2,
                  name="gather_key"),
        (b, h, m // wm - 2, r * wn, -1))  # [b, h, n//wn-2, r, wn, -1]
    gathered_value = tf.reshape(
        tf.gather(blocked_value_matrix,
                  rand_attn,
                  batch_dims=2,
                  name="gather_value"),
        (b, h, m // wm - 2, r * wn, -1))  # [b, h, n//wn-2, r, wn, -1]

    first_product = tf.einsum(
        "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, 0],
        key_layer)  # [b, h, wm, -1] x [b, h, n, -1] ==> [b, h, wm, n]
    first_product = tf.multiply(first_product, 1.0 / np.sqrt(d))
    first_product += (1.0 - to_mask) * -10000.0
    first_attn_weights = tf.nn.softmax(first_product)  # [b, h, wm, n]
    first_context_layer = tf.einsum(
        "BHQK,BHKD->BHQD", first_attn_weights,
        value_layer)  # [b, h, wm, n] x [b, h, n, -1] ==> [b, h, wm, -1]
    first_context_layer = tf.expand_dims(first_context_layer, 2)

    second_key_mat = tf.concat([
        blocked_key_matrix[:, :, 0], blocked_key_matrix[:, :, 1],
        blocked_key_matrix[:, :, 2], blocked_key_matrix[:, :, -1],
        gathered_key[:, :, 0]
    ], 2)  # [b, h, (4+r)*wn, -1]
    second_value_mat = tf.concat([
        blocked_value_matrix[:, :, 0], blocked_value_matrix[:, :, 1],
        blocked_value_matrix[:, :, 2], blocked_value_matrix[:, :, -1],
        gathered_value[:, :, 0]
    ], 2)  # [b, h, (4+r)*wn, -1]
    second_product = tf.einsum(
        "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, 1], second_key_mat
    )  # [b, h, wm, -1] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, (4+r)*wn]
    second_seq_pad = tf.concat([
        to_mask[:, :, :, :3 * wn], to_mask[:, :, :, -wn:],
        tf.ones([b, 1, 1, r * wn], dtype=tf.float32)
    ], 3)
    second_rand_pad = tf.concat(
        [tf.ones([b, h, wm, 4 * wn], dtype=tf.float32), rand_mask[:, :, 0]], 3)
    second_product = tf.multiply(second_product, 1.0 / np.sqrt(d))

    second_product += (1.0 -
                       tf.minimum(second_seq_pad, second_rand_pad)) * -10000.0
    second_attn_weights = tf.nn.softmax(
        second_product)  # [b , h, wm, (4+r)*wn]
    second_context_layer = tf.einsum(
        "BHQK,BHKD->BHQD", second_attn_weights, second_value_mat
    )  # [b, h, wm, (4+r)*wn] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, -1]
    second_context_layer = tf.expand_dims(second_context_layer, 2)

    exp_blocked_key_matrix = tf.concat([
        blocked_key_matrix[:, :, 1:-3], blocked_key_matrix[:, :, 2:-2],
        blocked_key_matrix[:, :, 3:-1]
    ], 3)  # [b, h, m//wm-4, 3*wn, -1]
    exp_blocked_value_matrix = tf.concat([
        blocked_value_matrix[:, :, 1:-3], blocked_value_matrix[:, :, 2:-2],
        blocked_value_matrix[:, :, 3:-1]
    ], 3)  # [b, h, m//wm-4, 3*wn, -1]
    middle_query_matrix = blocked_query_matrix[:, :, 2:-2]
    inner_band_product = tf.einsum(
        "BHLQD,BHLKD->BHLQK", middle_query_matrix, exp_blocked_key_matrix
    )  # [b, h, m//wm-4, wm, -1] x [b, h, m//wm-4, 3*wn, -1]
    #     ==> [b, h, m//wm-4, wm, 3*wn]
    inner_band_product = tf.multiply(inner_band_product, 1.0 / np.sqrt(d))
    rand_band_product = tf.einsum(
        "BHLQD,BHLKD->BHLQK", middle_query_matrix, gathered_key[:, :, 1:-1]
    )  # [b, h, m//wm-4, wm, -1] x [b, h, m//wm-4, r*wn, -1]
    #     ==> [b, h, m//wm-4, wm, r*wn]
    rand_band_product = tf.multiply(rand_band_product, 1.0 / np.sqrt(d))
    first_band_product = tf.einsum(
        "BHLQD,BHKD->BHLQK", middle_query_matrix, blocked_key_matrix[:, :, 0]
    )  # [b, h, m//wm-4, wm, -1] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, wn]
    first_band_product = tf.multiply(first_band_product, 1.0 / np.sqrt(d))
    last_band_product = tf.einsum(
        "BHLQD,BHKD->BHLQK", middle_query_matrix, blocked_key_matrix[:, :, -1]
    )  # [b, h, m//wm-4, wm, -1] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, wn]
    last_band_product = tf.multiply(last_band_product, 1.0 / np.sqrt(d))
    inner_band_product += (1.0 - band_mask) * -10000.0
    first_band_product += (1.0 -
                           tf.expand_dims(to_mask[:, :, :, :wn], 3)) * -10000.0
    last_band_product += (1.0 -
                          tf.expand_dims(to_mask[:, :, :, -wn:], 3)) * -10000.0
    rand_band_product += (1.0 - rand_mask[:, :, 1:-1]) * -10000.0
    band_product = tf.concat([
        first_band_product, inner_band_product, rand_band_product,
        last_band_product
    ], -1)  # [b, h, m//wm-4, wm, (5+r)*wn]
    attn_weights = tf.nn.softmax(band_product)  # [b, h, m//wm-4, wm, (5+r)*wn]
    context_layer = tf.einsum(
        "BHLQK,BHLKD->BHLQD", attn_weights[:, :, :, :,
                                           wn:4 * wn], exp_blocked_value_matrix
    )  # [b, h, m//wm-4, wm, 3*wn] x [b, h, m//wm-4, 3*wn, -1]
    #     ==> [b, h, m//wm-4, wm, -1]
    context_layer += tf.einsum(
        "BHLQK,BHLKD->BHLQD", attn_weights[:, :, :, :,
                                           4 * wn:-wn], gathered_value[:, :,
                                                                       1:-1]
    )  # [b, h, m//wm-4, wm, r*wn] x [b, h, m//wm-4, r*wn, -1]
    #     ==> [b, h, m//wm-4, wm, -1]
    context_layer += tf.einsum(
        "BHLQK,BHKD->BHLQD", attn_weights[:, :, :, :, :wn],
        blocked_value_matrix[:, :, 0]
    )  # [b, h, m//wm-4, wm, wn] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, -1]
    context_layer += tf.einsum(
        "BHLQK,BHKD->BHLQD", attn_weights[:, :, :, :,
                                          -wn:], blocked_value_matrix[:, :, -1]
    )  # [b, h, m//wm-4, wm, wn] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, -1]

    second_last_key_mat = tf.concat([
        blocked_key_matrix[:, :, 0], blocked_key_matrix[:, :, -3],
        blocked_key_matrix[:, :, -2], blocked_key_matrix[:, :, -1],
        gathered_key[:, :, -1]
    ], 2)  # [b, h, (4+r)*wn, -1]
    second_last_value_mat = tf.concat([
        blocked_value_matrix[:, :, 0], blocked_value_matrix[:, :, -3],
        blocked_value_matrix[:, :, -2], blocked_value_matrix[:, :, -1],
        gathered_value[:, :, -1]
    ], 2)  # [b, h, (4+r)*wn, -1]
    second_last_product = tf.einsum(
        "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, -2], second_last_key_mat
    )  # [b, h, wm, -1] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, (4+r)*wn]
    second_last_seq_pad = tf.concat([
        to_mask[:, :, :, :wn], to_mask[:, :, :, -3 * wn:],
        tf.ones([b, 1, 1, r * wn], dtype=tf.float32)
    ], 3)
    second_last_rand_pad = tf.concat(
        [tf.ones([b, h, wm, 4 * wn], dtype=tf.float32), rand_mask[:, :, -1]],
        3)
    second_last_product = tf.multiply(second_last_product, 1.0 / np.sqrt(d))
    second_last_product += (
        1.0 - tf.minimum(second_last_seq_pad, second_last_rand_pad)) * -10000.0
    second_last_attn_weights = tf.nn.softmax(
        second_last_product)  # [b, h, wm, (4+r)*wn]
    second_last_context_layer = tf.einsum(
        "BHQK,BHKD->BHQD", second_last_attn_weights, second_last_value_mat
    )  # [b, h, wm, (4+r)*wn] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, -1]
    second_last_context_layer = tf.expand_dims(second_last_context_layer, 2)

    last_product = tf.einsum(
        "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, -1],
        key_layer)  # [b, h, wm, -1] x [b, h, n, -1] ==> [b, h, wm, n]
    last_product = tf.multiply(last_product, 1.0 / np.sqrt(d))
    last_product += (1.0 - to_mask) * -10000.0
    last_attn_weights = tf.nn.softmax(last_product)  # [b, h, wm, n]
    last_context_layer = tf.einsum(
        "BHQK,BHKD->BHQD", last_attn_weights,
        value_layer)  # [b, h, wm, n] x [b, h, n, -1] ==> [b, h, wm, -1]
    last_context_layer = tf.expand_dims(last_context_layer, 2)

    context_layer = tf.concat([
        first_context_layer, second_context_layer, context_layer,
        second_last_context_layer, last_context_layer
    ], 2)
    context_layer = tf.reshape(context_layer, (b, h, m, -1)) * from_mask
    context_layer = tf.transpose(context_layer, (0, 2, 1, 3))
    return context_layer
Beispiel #12
0
 def positive_fcn():
     res = tf.divide(tf.math.reduce_sum(tf.multiply(x_m, y_m), axis=0),
                     tf.multiply(tf.math.sqrt(x_p), tf.math.sqrt(y_p)))
     return res
Beispiel #13
0
 def call(self, y_true, y_pred):
   """See tf.keras.losses.Loss."""
   losses, weights = self._loss.compute_unreduced_loss(
       labels=y_true, logits=y_pred)
   return tf.multiply(losses, weights)
def boolean_mask(boxlist,
                 indicator,
                 fields=None,
                 scope=None,
                 use_static_shapes=False,
                 indicator_sum=None):
    """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
    with tf.name_scope(scope, 'BooleanMask'):
        if indicator.shape.ndims != 1:
            raise ValueError('indicator should have rank 1')
        if indicator.dtype != tf.bool:
            raise ValueError('indicator should be a boolean tensor')
        if use_static_shapes:
            if not (indicator_sum and isinstance(indicator_sum, int)):
                raise ValueError('`indicator_sum` must be a of type int')
            selected_positions = tf.cast(indicator, dtype=tf.float32)
            indexed_positions = tf.cast(tf.multiply(
                tf.cumsum(selected_positions), selected_positions),
                                        dtype=tf.int32)
            one_hot_selector = tf.one_hot(indexed_positions - 1,
                                          indicator_sum,
                                          dtype=tf.float32)
            sampled_indices = tf.cast(tf.tensordot(tf.cast(tf.range(
                tf.shape(indicator)[0]),
                                                           dtype=tf.float32),
                                                   one_hot_selector,
                                                   axes=[0, 0]),
                                      dtype=tf.int32)
            return gather(boxlist, sampled_indices, use_static_shapes=True)
        else:
            subboxlist = box_list.BoxList(
                tf.boolean_mask(boxlist.get(), indicator))
            if fields is None:
                fields = boxlist.get_extra_fields()
            for field in fields:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all specified fields')
                subfieldlist = tf.boolean_mask(boxlist.get_field(field),
                                               indicator)
                subboxlist.add_field(field, subfieldlist)
            return subboxlist
    def _static_subsample(self, indicator, batch_size, labels):
        """Returns subsampled minibatch.

    Args:
      indicator: boolean tensor of shape [N] whose True entries can be sampled.
        N should be a complie time constant.
      batch_size: desired batch size. This scalar cannot be None.
      labels: boolean tensor of shape [N] denoting positive(=True) and negative
        (=False) examples. N should be a complie time constant.

    Returns:
      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
        are sampled. It ensures the length of output of the subsample is always
        batch_size, even when number of examples set to True in indicator is
        less than batch_size.

    Raises:
      ValueError: if labels and indicator are not 1D boolean tensors.
    """
        # Check if indicator and labels have a static size.
        if not indicator.shape.is_fully_defined():
            raise ValueError(
                'indicator must be static in shape when is_static is'
                'True')
        if not labels.shape.is_fully_defined():
            raise ValueError('labels must be static in shape when is_static is'
                             'True')
        if not isinstance(batch_size, int):
            raise ValueError(
                'batch_size has to be an integer when is_static is'
                'True.')

        input_length = tf.shape(input=indicator)[0]

        # Set the number of examples set True in indicator to be at least
        # batch_size.
        num_true_sampled = tf.reduce_sum(
            input_tensor=tf.cast(indicator, tf.float32))
        additional_false_sample = tf.less_equal(
            tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)),
            batch_size - num_true_sampled)
        indicator = tf.logical_or(indicator, additional_false_sample)

        # Shuffle indicator and label. Need to store the permutation to restore the
        # order post sampling.
        permutation = tf.random.shuffle(tf.range(input_length))
        indicator = ops.matmul_gather_on_zeroth_axis(
            tf.cast(indicator, tf.float32), permutation)
        labels = ops.matmul_gather_on_zeroth_axis(tf.cast(labels, tf.float32),
                                                  permutation)

        # index (starting from 1) when indicator is True, 0 when False
        indicator_idx = tf.where(tf.cast(indicator, tf.bool),
                                 tf.range(1, input_length + 1),
                                 tf.zeros(input_length, tf.int32))

        # Replace -1 for negative, +1 for positive labels
        signed_label = tf.where(
            tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
            tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
        # negative of index for negative label, positive index for positive label,
        # 0 when indicator is False.
        signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
        sorted_signed_indicator_idx = tf.nn.top_k(signed_indicator_idx,
                                                  input_length,
                                                  sorted=True).values

        [num_positive_samples, num_negative_samples
         ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx,
                                           batch_size)

        sampled_idx = self._get_values_from_start_and_end(
            sorted_signed_indicator_idx, num_positive_samples,
            num_negative_samples, batch_size)

        # Shift the indices to start from 0 and remove any samples that are set as
        # False.
        sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
        sampled_idx = tf.multiply(
            tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
            sampled_idx)

        sampled_idx_indicator = tf.cast(
            tf.reduce_sum(input_tensor=tf.one_hot(sampled_idx,
                                                  depth=input_length),
                          axis=0), tf.bool)

        # project back the order based on stored permutations
        reprojections = tf.one_hot(permutation,
                                   depth=input_length,
                                   dtype=tf.float32)
        return tf.cast(
            tf.tensordot(tf.cast(sampled_idx_indicator, tf.float32),
                         reprojections,
                         axes=[0, 0]), tf.bool)
Beispiel #16
0
 def test_true(self):
     x = tf.constant(2)
     y = tf.constant(5)
     z = ps.cond(True, lambda: tf.multiply(x, 16),
                 lambda: tf.multiply(y, 5))
     self.assertEqual(self.evaluate(z), 32)
Beispiel #17
0
    def train(self, sentences):
        token_ids, token_values, token_dense_shape = self._tokenize(sentences)
        tokens_sparse = tf.sparse.SparseTensor(indices=token_ids,
                                               values=token_values,
                                               dense_shape=token_dense_shape)
        tokens = tf.sparse.to_dense(tokens_sparse, default_value="")

        sparse_lookup_ids = tf.sparse.SparseTensor(
            indices=tokens_sparse.indices,
            values=self._words_to_indices(tokens_sparse.values),
            dense_shape=tokens_sparse.dense_shape)
        lookup_ids = tf.sparse.to_dense(sparse_lookup_ids, default_value=0)

        # Targets are the next word for each word of the sentence.
        tokens_ids_seq = lookup_ids[:, 0:-1]
        tokens_ids_target = lookup_ids[:, 1:]

        tokens_prefix = tokens[:, 0:-1]

        # Mask determining which positions we care about for a loss: all positions
        # that have a valid non-terminal token.
        mask = tf.logical_and(tf.logical_not(tf.equal(tokens_prefix, "")),
                              tf.logical_not(tf.equal(tokens_prefix, "<E>")))

        input_mask = tf.cast(mask, tf.int32)

        with tf.GradientTape() as t:
            sentence_embeddings = tf.nn.embedding_lookup(
                self._embeddings, tokens_ids_seq)

            lstm_initial_state = self._lstm_cell.get_initial_state(
                sentence_embeddings)

            lstm_output = self._rnn_layer(inputs=sentence_embeddings,
                                          initial_state=lstm_initial_state)

            # Stack LSTM outputs into a batch instead of a 2D array.
            lstm_output = tf.reshape(lstm_output,
                                     [-1, self._lstm_cell.output_size])

            logits = self._logit_layer(lstm_output)

            targets = tf.reshape(tokens_ids_target, [-1])
            weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32)

            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=targets, logits=logits)

            # Final loss is the mean loss for all token losses.
            final_loss = tf.math.divide(tf.reduce_sum(
                tf.multiply(losses, weights)),
                                        tf.reduce_sum(weights),
                                        name="final_loss")

        watched = t.watched_variables()
        gradients = t.gradient(final_loss, watched)

        for w, g in zip(watched, gradients):
            w.assign_sub(g)

        return final_loss
Beispiel #18
0
def _update_confusion_matrix_variables_optimized(
    variables_to_update,
    y_true,
    y_pred,
    thresholds,
    multi_label=False,
    sample_weights=None,
    label_weights=None,
    thresholds_with_epsilon=False,
):
    """Update confusion matrix variables with memory efficient alternative.

    Note that the thresholds need to be evenly distributed within the list, eg,
    the diff between consecutive elements are the same.

    To compute TP/FP/TN/FN, we are measuring a binary classifier
      C(t) = (predictions >= t)
    at each threshold 't'. So we have
      TP(t) = sum( C(t) * true_labels )
      FP(t) = sum( C(t) * false_labels )

    But, computing C(t) requires computation for each t. To make it fast,
    observe that C(t) is a cumulative integral, and so if we have
      thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
    where n = num_thresholds, and if we can compute the bucket function
      B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
    then we get
      C(t_i) = sum( B(j), j >= i )
    which is the reversed cumulative sum in tf.cumsum().

    We can compute B(i) efficiently by taking advantage of the fact that
    our thresholds are evenly distributed, in that
      width = 1.0 / (num_thresholds - 1)
      thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
    Given a prediction value p, we can map it to its bucket by
      bucket_index(p) = floor( p * (num_thresholds - 1) )
    so we can use tf.math.unsorted_segment_sum() to update the buckets in one
    pass.

    Consider following example:
    y_true = [0, 0, 1, 1]
    y_pred = [0.1, 0.5, 0.3, 0.9]
    thresholds = [0.0, 0.5, 1.0]
    num_buckets = 2   # [0.0, 1.0], (1.0, 2.0]
    bucket_index(y_pred) = tf.math.floor(y_pred * num_buckets)
                         = tf.math.floor([0.2, 1.0, 0.6, 1.8])
                         = [0, 0, 0, 1]
    # The meaning of this bucket is that if any of the label is true,
    # then 1 will be added to the corresponding bucket with the index.
    # Eg, if the label for 0.2 is true, then 1 will be added to bucket 0. If the
    # label for 1.8 is true, then 1 will be added to bucket 1.
    #
    # Note the second item "1.0" is floored to 0, since the value need to be
    # strictly larger than the bucket lower bound.
    # In the implementation, we use tf.math.ceil() - 1 to achieve this.
    tp_bucket_value = tf.math.unsorted_segment_sum(true_labels, bucket_indices,
                                                   num_segments=num_thresholds)
                    = [1, 1, 0]
    # For [1, 1, 0] here, it means there is 1 true value contributed by bucket 0,
    # and 1 value contributed by bucket 1. When we aggregate them to together,
    # the result become [a + b + c, b + c, c], since large thresholds will always
    # contribute to the value for smaller thresholds.
    true_positive = tf.math.cumsum(tp_bucket_value, reverse=True)
                  = [2, 1, 0]

    This implementation exhibits a run time and space complexity of O(T + N),
    where T is the number of thresholds and N is the size of predictions.
    Metrics that rely on standard implementation instead exhibit a complexity of
    O(T * N).

    Args:
      variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
        and corresponding variables to update as values.
      y_true: A floating point `Tensor` whose shape matches `y_pred`. Will be cast
        to `bool`.
      y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
        the range `[0, 1]`.
      thresholds: A sorted floating point `Tensor` with value in `[0, 1]`.
        It need to be evenly distributed (the diff between each element need to be
        the same).
      multi_label: Optional boolean indicating whether multidimensional
        prediction/labels should be treated as multilabel responses, or flattened
        into a single label. When True, the valus of `variables_to_update` must
        have a second dimension equal to the number of labels in y_true and
        y_pred, and those tensors must not be RaggedTensors.
      sample_weights: Optional `Tensor` whose rank is either 0, or the same rank
        as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions
        must be either `1`, or the same as the corresponding `y_true` dimension).
      label_weights: Optional tensor of non-negative weights for multilabel
        data. The weights are applied when calculating TP, FP, FN, and TN without
        explicit multilabel handling (i.e. when the data is to be flattened).
      thresholds_with_epsilon: Optional boolean indicating whether the leading and
        tailing thresholds has any epsilon added for floating point imprecisions.
        It will change how we handle the leading and tailing bucket.

    Returns:
      Update op.
    """
    num_thresholds = thresholds.shape.as_list()[0]

    if sample_weights is None:
        sample_weights = 1.0
    else:
        sample_weights = tf.__internal__.ops.broadcast_weights(
            tf.cast(sample_weights, dtype=y_pred.dtype), y_pred)
        if not multi_label:
            sample_weights = tf.reshape(sample_weights, [-1])
    if label_weights is None:
        label_weights = 1.0
    else:
        label_weights = tf.expand_dims(label_weights, 0)
        label_weights = tf.__internal__.ops.broadcast_weights(
            label_weights, y_pred)
        if not multi_label:
            label_weights = tf.reshape(label_weights, [-1])
    weights = tf.multiply(sample_weights, label_weights)

    # We shouldn't need this, but in case there are predict value that is out of
    # the range of [0.0, 1.0]
    y_pred = tf.clip_by_value(y_pred, clip_value_min=0.0, clip_value_max=1.0)

    y_true = tf.cast(tf.cast(y_true, tf.bool), y_true.dtype)
    if not multi_label:
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

    true_labels = tf.multiply(y_true, weights)
    false_labels = tf.multiply((1.0 - y_true), weights)

    # Compute the bucket indices for each prediction value.
    # Since the predict value has to be strictly greater than the thresholds,
    # eg, buckets like [0, 0.5], (0.5, 1], and 0.5 belongs to first bucket.
    # We have to use math.ceil(val) - 1 for the bucket.
    bucket_indices = tf.math.ceil(y_pred * (num_thresholds - 1)) - 1

    if thresholds_with_epsilon:
        # In this case, the first bucket should actually take into account since
        # the any prediction between [0.0, 1.0] should be larger than the first
        # threshold. We change the bucket value from -1 to 0.
        bucket_indices = tf.nn.relu(bucket_indices)

    bucket_indices = tf.cast(bucket_indices, tf.int32)

    if multi_label:
        # We need to run bucket segment sum for each of the label class. In the
        # multi_label case, the rank of the label is 2. We first transpose it so
        # that the label dim becomes the first and we can parallel run though them.
        true_labels = tf.transpose(true_labels)
        false_labels = tf.transpose(false_labels)
        bucket_indices = tf.transpose(bucket_indices)

        def gather_bucket(label_and_bucket_index):
            label, bucket_index = (
                label_and_bucket_index[0],
                label_and_bucket_index[1],
            )
            return tf.math.unsorted_segment_sum(
                data=label,
                segment_ids=bucket_index,
                num_segments=num_thresholds,
            )

        tp_bucket_v = tf.vectorized_map(gather_bucket,
                                        (true_labels, bucket_indices))
        fp_bucket_v = tf.vectorized_map(gather_bucket,
                                        (false_labels, bucket_indices))
        tp = tf.transpose(tf.cumsum(tp_bucket_v, reverse=True, axis=1))
        fp = tf.transpose(tf.cumsum(fp_bucket_v, reverse=True, axis=1))
    else:
        tp_bucket_v = tf.math.unsorted_segment_sum(
            data=true_labels,
            segment_ids=bucket_indices,
            num_segments=num_thresholds,
        )
        fp_bucket_v = tf.math.unsorted_segment_sum(
            data=false_labels,
            segment_ids=bucket_indices,
            num_segments=num_thresholds,
        )
        tp = tf.cumsum(tp_bucket_v, reverse=True)
        fp = tf.cumsum(fp_bucket_v, reverse=True)

    # fn = sum(true_labels) - tp
    # tn = sum(false_labels) - fp
    if (ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
            or ConfusionMatrix.FALSE_NEGATIVES in variables_to_update):
        if multi_label:
            total_true_labels = tf.reduce_sum(true_labels, axis=1)
            total_false_labels = tf.reduce_sum(false_labels, axis=1)
        else:
            total_true_labels = tf.reduce_sum(true_labels)
            total_false_labels = tf.reduce_sum(false_labels)

    update_ops = []
    if ConfusionMatrix.TRUE_POSITIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.TRUE_POSITIVES]
        update_ops.append(variable.assign_add(tp))
    if ConfusionMatrix.FALSE_POSITIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.FALSE_POSITIVES]
        update_ops.append(variable.assign_add(fp))
    if ConfusionMatrix.TRUE_NEGATIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.TRUE_NEGATIVES]
        tn = total_false_labels - fp
        update_ops.append(variable.assign_add(tn))
    if ConfusionMatrix.FALSE_NEGATIVES in variables_to_update:
        variable = variables_to_update[ConfusionMatrix.FALSE_NEGATIVES]
        fn = total_true_labels - tp
        update_ops.append(variable.assign_add(fn))
    return tf.group(update_ops)
Beispiel #19
0
 def _scale_one_loss(l):  # Separate def avoids lambda capture of loop var.
   f = tf.function(lambda: tf.multiply(multiplier, l()))
   _ = f.get_concrete_function()
   return f
def lattice_rule_sample(generating_vectors: types.IntTensor,
                        dim: types.IntTensor,
                        num_results: types.IntTensor,
                        sequence_indices: types.IntTensor = None,
                        additive_shift: types.FloatTensor = None,
                        apply_tent_transform: bool = False,
                        validate_args: bool = False,
                        dtype: tf.DType = None,
                        name: str = None) -> types.RealTensor:
    r"""Constructs a lattice rule from a generating vector.

  #### Examples

  ```python
  import tensorflow as tf
  import tf_quant_finance as tff

  # Example: Sampling 1,000 points from 2D generating vectors.

  generating_vectors = tf.constant([1, 387275, 314993, 50301], dtype=tf.int32)

  dim = 2
  num_results = 1000

  tff.math.qmc.lattice_rule_sample(generating_vectors, dim, num_results)
  # ==> tf.Tensor([
  #             [0.,         0.        ],
  #             [0.001,      0.2749939 ],
  #             [0.002,      0.5499878 ],
  #             ...
  #             [0.99700004, 0.1689148 ],
  #             [0.998,      0.4439087 ],
  #             [0.9990001,  0.7189026 ],
  #         ], shape=(1000, 2), dtype=float32)
  ```

  Args:
    generating_vectors: Positive scalar `Tensor` of integers with rank 1
      representing the vector from which to sample points.
    dim: Positive scalar `Tensor` of integers with rank 0. The event size of the
      sampled points. Must not exceed the size of `generating_vectors`.
    num_results: Positive scalar `Tensor` of integers with rank 0. The maximum
      number of points to sample.
    sequence_indices: Optional positive scalar `Tensor` of integers with rank 1.
      The elements of the sequence to return specified by their position in the
      sequence.
      Default value: `None` which corresponds to the `[0, num_results)` range.
    additive_shift: Optional scalar `Tensor` of real values with the same
      `shape` as `generating_vectors`. The additive shift to add to all the
      points (modulo 1) before applying the tent transform.
      Default value: `None`.
    apply_tent_transform: Python `bool` indicating whether to apply a tent
      transform to the sampled points.
      Default value: `False`.
    validate_args: Python `bool` indicating whether to validate arguments.
      Default value: `False`.
    dtype: Optional `dtype`. The `dtype` of the output `Tensor` (either
      `float32` or `float64`).
      Default value: `None` which maps to `float32`.
    name: Python `str` name prefixed to ops created by this function.
      Default value: `None` which maps to `sample_lattice_rule`.

  Returns:
    A `Tensor` of samples from  the Sobol sequence with `shape`
    `(num_samples,)` where `num_samples = min(num_results,
    size(sequence_indices))`.
  """

    with tf.name_scope(name or 'sample_lattice_rule'):
        # shape: (?,)
        generating_vectors = tf.convert_to_tensor(generating_vectors,
                                                  name='generating_vectors')

        int_dtype = generating_vectors.dtype
        real_dtype = dtype or tf.float32

        dim = tf.convert_to_tensor(dim, dtype=int_dtype, name='dim')
        num_results = tf.convert_to_tensor(num_results,
                                           dtype=int_dtype,
                                           name='num_results')

        control_deps = []
        if validate_args:
            control_deps.append(
                tf.debugging.assert_equal(
                    tf.rank(generating_vectors),
                    1,
                    message='generating_vectors must have rank 1'))
            control_deps.append(
                tf.debugging.assert_less_equal(
                    dim,
                    tf.size(generating_vectors, out_type=int_dtype),
                    message='dim must not exceed the size of generating_vectors'
                ))
            control_deps.append(
                tf.debugging.assert_positive(
                    num_results, message='num_results must be positive'))

        with tf.control_dependencies(control_deps):
            # shape: (num_samples,)
            if sequence_indices is None:
                sequence_indices = tf.range(0, num_results)
            sequence_indices = tf.cast(sequence_indices,
                                       int_dtype,
                                       name='sequence_indices')

            unit = tf.ones(shape=(), dtype=real_dtype)

            # shape: (dim,)
            scaled_vector = tf.divide(
                # shape: (dim,)
                tf.cast(generating_vectors[:dim], real_dtype),
                # shape: ()
                tf.cast(num_results, real_dtype))

            # shape: (num_samples, dim)
            points = tf.multiply(
                # shape: (num_samples, 1)
                tf.expand_dims(tf.cast(sequence_indices, real_dtype), axis=1),
                # shape: (1, dim)
                tf.expand_dims(tf.math.floormod(scaled_vector, unit), axis=0))

            if additive_shift is not None:
                # shape: (num_results,)
                additive_shift = tf.cast(additive_shift,
                                         real_dtype,
                                         name='additive_shift')
                # shape: (num_samples, dim)
                points += additive_shift[:dim]

                # shape: (num_samples, dim)
            points = tf.math.floormod(points, unit)

            # shape: (num_samples, dim)
            return utils.tent_transform(
                points) if apply_tent_transform else points
Beispiel #21
0
def volume_coefficient(basis):
  return tf.multiply(
      tf.linalg.logdet(tf.linalg.matmul(basis, basis, transpose_b=True)), 0.5)
Beispiel #22
0
 def _scale_one_loss(
         l):  # Separate def avoids lambda capture of loop var.
     f = tf.function(
         lambda: tf.multiply(regularization_loss_multiplier, l()))
     _ = f.get_concrete_function()
     return f
Beispiel #23
0
 def call(self, inputs):
   return tf.multiply(inputs, self.my_var, name='my_op')
Beispiel #24
0
 def call(self, text, features):
     text_embedding = self.text_encoder(text)
     film_mask = self.fc_film(text_embedding)
     x = self.fc1(features)
     x = self.fc2(x)
     return tf.multiply(x, film_mask)
Beispiel #25
0
    def update_state(self, values, sample_weight=None):
        """Accumulates statistics for computing the metric.

    Args:
      values: Per-example value.
      sample_weight: Optional weighting of each example. Defaults to 1.

    Returns:
      Update op.
    """
        [values], sample_weight = \
            metrics_utils.ragged_assert_compatible_and_get_flat_values(
                [values], sample_weight)
        try:
            values = tf.cast(values, self._dtype)
        except (ValueError, TypeError):
            msg = (
                'The output of a metric function can only be a single Tensor. '
                f'Received: {values}. ')
            if isinstance(values, dict):
                msg += (
                    'To return a dict of values, implement a custom Metric '
                    'subclass.')
            raise RuntimeError(msg)
        if sample_weight is not None:
            sample_weight = tf.cast(sample_weight, self._dtype)
            # Update dimensions of weights to match with values if possible.
            values, _, sample_weight = losses_utils.squeeze_or_expand_dimensions(
                values, sample_weight=sample_weight)
            try:
                # Broadcast weights if possible.
                sample_weight = tf.__internal__.ops.broadcast_weights(
                    sample_weight, values)
            except ValueError:
                # Reduce values to same ndim as weight array
                ndim = backend.ndim(values)
                weight_ndim = backend.ndim(sample_weight)
                if self.reduction == metrics_utils.Reduction.SUM:
                    values = tf.reduce_sum(values,
                                           axis=list(range(weight_ndim, ndim)))
                else:
                    values = tf.reduce_mean(values,
                                            axis=list(range(weight_ndim,
                                                            ndim)))
            values = tf.multiply(values, sample_weight)

        value_sum = tf.reduce_sum(values)
        with tf.control_dependencies([value_sum]):
            update_total_op = self.total.assign_add(value_sum)

        # Exit early if the reduction doesn't have a denominator.
        if self.reduction == metrics_utils.Reduction.SUM:
            return update_total_op

        # Update `count` for reductions that require a denominator.
        if self.reduction == metrics_utils.Reduction.SUM_OVER_BATCH_SIZE:
            num_values = tf.cast(tf.size(values), self._dtype)
        elif self.reduction == metrics_utils.Reduction.WEIGHTED_MEAN:
            if sample_weight is None:
                num_values = tf.cast(tf.size(values), self._dtype)
            else:
                num_values = tf.reduce_sum(sample_weight)
        else:
            raise NotImplementedError(
                f'Reduction "{self.reduction}" not implemented. Expected '
                '"sum", "weighted_mean", or "sum_over_batch_size".')

        with tf.control_dependencies([update_total_op]):
            return self.count.assign_add(num_values)
Beispiel #26
0
def cosine_distance(x, y):
  """Calculates the distance between 2 tensors of same shape."""
  normalizedx = tf.math.l2_normalize(x)
  normalizedy = tf.math.l2_normalize(y)
  return 1. - tf.reduce_sum(tf.multiply(normalizedx, normalizedy))
Beispiel #27
0
 def multiply_tt(y):
     return tf.reduce_mean(
         tf.multiply(
             tf.multiply(weights[:, y,
                                 tf.newaxis], responsabilities[y]),
             self.compute_log_pdf(X, y)))