Python reduce_mean Beispiele, tapas.models.segmented_tensor.reduce_mean Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: table_pruning.py Projekt: google-research/tapas

 def _select_columns(self, mode, features):
     input_mask = features["input_mask"]
     column_ids = features["column_ids"]
     with tf.variable_scope("bert"):
         with tf.variable_scope("embeddings",
                                reuse=tf.compat.v1.AUTO_REUSE):
             input_embeddings, _ = modeling.embedding_lookup(
                 input_ids=features["input_ids"],
                 vocab_size=self._vocab_size,
                 embedding_size=self._hidden_size,
                 initializer_range=self._initializer_range,
                 word_embedding_name="word_embeddings")
             if self._use_positional_embeddings:
                 token_type_ids = []
                 token_type_features = [
                     "segment_ids", "column_ids", "row_ids",
                     "prev_label_ids", "column_ranks", "inv_column_ranks",
                     "numeric_relations"
                 ]
                 for key in token_type_features:
                     if self._disabled_features is not None and key in self._disabled_features:
                         token_type_ids.append(tf.zeros_like(features[key]))
                     else:
                         token_type_ids.append(features[key])
                 input_embeddings = modeling.embedding_postprocessor(
                     input_tensor=input_embeddings,
                     use_token_type=True,
                     token_type_ids=token_type_ids,
                     token_type_vocab_size=self._type_vocab_size,
                     token_type_embedding_name="token_type_embeddings",
                     use_position_embeddings=self._use_position_embeddings,
                     position_embedding_name="position_embeddings",
                     initializer_range=self._initializer_range,
                     max_position_embeddings=self._max_position_embeddings,
                     extra_embeddings=None,
                     dropout_prob=0.0)
             # Indexes all the zero values from the input_mask by (max_num_columns+1)
             # The index 0 is for the question and from 1 to max_num_columns included
             # is for the columns.
             masked_col_ids = column_ids * input_mask + (1 - input_mask) * (
                 self._max_num_columns + 1)
             col_index = segmented_tensor.IndexMap(
                 indices=masked_col_ids,
                 num_segments=self._max_num_columns + 2,
                 batch_dims=1)
             average_embeddings, _ = segmented_tensor.reduce_mean(
                 input_embeddings, col_index)
             # Removes the last index as it contains the avg of non selected values
             average_embeddings = average_embeddings[:, :-1]
             normalize_average_embeddings = tf.math.l2_normalize(
                 average_embeddings, axis=2)
             questions_embeddings = normalize_average_embeddings[:, :1]
             columns_embeddings = normalize_average_embeddings[:, 1:]
             multiply = columns_embeddings * questions_embeddings
             multiply = tf.where(tf.is_nan(multiply),
                                 tf.zeros_like(multiply), multiply)
             column_scores = tf.math.reduce_sum(multiply,
                                                axis=-1,
                                                name="column_scores")
             return column_scores

Beispiel #2

0

Datei anzeigen

Datei: segmented_tensor_test.py Projekt: sparshbhawsar/Tapas

 def test_reduce_mean(self):
     values, row_index, col_index = self._prepare_tables()
     cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)
     row_mean, _ = segmented_tensor.reduce_mean(values, row_index)
     col_mean, _ = segmented_tensor.reduce_mean(values, col_index)
     cell_mean, _ = segmented_tensor.reduce_mean(values, cell_index)
     with self.session() as sess:
         self.assertAllClose(sess.run(row_mean),
                             [[6.0 / 3.0, 3.0 / 3.0, 8.0 / 3.0],
                              [6.0 / 3.0, 3.0 / 3.0, 8.0 / 3.0]])
         self.assertAllClose(sess.run(col_mean),
                             [[9.0 / 6.0, 8.0 / 3.0, 0.0],
                              [4.0 / 3.0, 5.0 / 3.0, 8.0 / 3.0]])
         self.assertAllClose(sess.run(cell_mean), [[
             3.0 / 2.0, 3.0, 0.0, 2.0 / 2.0, 1.0, 0.0, 4.0 / 2.0, 4.0, 0.0
         ], [1.0, 2.0, 3.0, 2.0, 0.0, 1.0, 1.0, 3.0, 4.0]])

Beispiel #3

0

Datei anzeigen

Datei: tapas_classifier_model_utils.py Projekt: google-research/tapas

def compute_column_logits(output_layer, cell_index, cell_mask,
                          init_cell_selection_weights_to_zero,
                          allow_empty_column_selection):
    """Computes logits for each column.

  Args:
    output_layer: <float>[batch_size, seq_length, hidden_dim] Output of the
      encoder layer.
    cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into cells.
    cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
      cell, 1 for cells that exists in the example and 0 for padding.
    init_cell_selection_weights_to_zero: Whether the initial weights should be
      set to 0. This is also applied to column logits, as they are used to
      select the cells. This ensures that all columns have the same prior
      probability.
    allow_empty_column_selection: Allow to select no column.

  Returns:
    <float>[batch_size, max_num_cols] Logits per column. Logits will be set to
      a very low value (such that the probability is 0) for the special id 0
      (which means "outside the table") or columns that do not apear in the
      table.
  """
    hidden_size = output_layer.shape.as_list()[-1]
    column_output_weights = tf.get_variable(
        "column_output_weights", [hidden_size],
        initializer=tf.zeros_initializer() if
        init_cell_selection_weights_to_zero else classification_initializer())
    column_output_bias = tf.get_variable("column_output_bias",
                                         shape=(),
                                         initializer=tf.zeros_initializer())
    token_logits = (
        tf.einsum("bsj,j->bs", output_layer, column_output_weights) +
        column_output_bias)

    # Average the logits per cell and then per column.
    # Note that by linearity it doesn't matter if we do the averaging on the
    # embeddings or on the logits. For performance we do the projection first.
    # [batch_size, max_num_cols * max_num_rows]
    cell_logits, cell_logits_index = segmented_tensor.reduce_mean(
        token_logits, cell_index)

    column_index = cell_index.project_inner(cell_logits_index)
    # [batch_size, max_num_cols]
    column_logits, out_index = segmented_tensor.reduce_sum(
        cell_logits * cell_mask, column_index)
    cell_count, _ = segmented_tensor.reduce_sum(cell_mask, column_index)
    column_logits /= cell_count + EPSILON_ZERO_DIVISION

    # Mask columns that do not appear in the example.
    is_padding = tf.logical_and(cell_count < 0.5,
                                tf.not_equal(out_index.indices, 0))
    column_logits += CLOSE_ENOUGH_TO_LOG_ZERO * tf.cast(is_padding, tf.float32)

    if not allow_empty_column_selection:
        column_logits += CLOSE_ENOUGH_TO_LOG_ZERO * tf.cast(
            tf.equal(out_index.indices, 0), tf.float32)

    return column_logits

Beispiel #4

0

Datei anzeigen

Datei: table_pruning.py Projekt: google-research/tapas

    def _compute_column_scores_from_token_scores(self, mode, output_layer,
                                                 features):
        """Gets the columns scores by avereging the tokens scores."""
        with tf.variable_scope(PRUNING_SCOPE, reuse=tf.AUTO_REUSE):
            if mode == tf_estimator.ModeKeys.TRAIN:
                output_layer = tf.nn.dropout(
                    output_layer, keep_prob=_SEQUENCE_OUTPUT_KEEP_PROB)
            input_mask = features["input_mask"]
            row_ids = features["row_ids"]
            column_ids = features["column_ids"]

            # Construct indices for the table.
            row_index = segmented_tensor.IndexMap(
                indices=tf.minimum(row_ids, self._max_num_rows - 1),
                num_segments=self._max_num_rows,
                batch_dims=1)
            col_index = segmented_tensor.IndexMap(
                indices=tf.minimum(column_ids, self._max_num_columns),
                num_segments=self._max_num_columns + 1,
                batch_dims=1)
            cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

            # Masks.
            # <float32>[batch_size, seq_length]
            input_mask_float = tf.cast(input_mask, tf.float32)
            # Mask for cells that exist in the table (i.e. that are not padding).
            cell_mask, _ = segmented_tensor.reduce_mean(
                input_mask_float, cell_index)

            # Compute logits per column which can be used to select a column.
            # <float32>[batch_size, max_num_columns]
            column_scores = utils.compute_column_logits(
                output_layer=output_layer,
                cell_index=cell_index,
                cell_mask=cell_mask,
                init_cell_selection_weights_to_zero=False,
                allow_empty_column_selection=False)[:, 1:]
            column_scores = tf.debugging.assert_all_finite(
                column_scores, "column_scores contains nan values.")
            return column_scores

Beispiel #5

0

Datei anzeigen

    def call(self, inputs, cell_index, cell_mask):
        '''
        Args:
        inputs: <float>[batch_size, seq_length, hidden_dim] Output of the
            encoder layer.
        cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
        groups tokens into cells.
        cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
        cell, 1 for cells that exists in the example and 0 for padding.
        '''
        token_logits = (
            tf.einsum("bsj,j->bs", inputs, self.column_output_weights) +
            self.column_output_bias)

        # Average the logits per cell and then per column.
        # Note that by linearity it doesn't matter if we do the averaging on the
        # embeddings or on the logits. For performance we do the projection first.
        # [batch_size, max_num_cols * max_num_rows]
        cell_logits, cell_logits_index = segmented_tensor.reduce_mean(
            token_logits, cell_index)

        column_index = cell_index.project_inner(cell_logits_index)
        # [batch_size, max_num_cols]
        column_logits, out_index = segmented_tensor.reduce_sum(
            cell_logits * cell_mask, column_index)
        cell_count, _ = segmented_tensor.reduce_sum(cell_mask, column_index)
        column_logits /= cell_count + EPSILON_ZERO_DIVISION

        # Mask columns that do not appear in the example.
        is_padding = tf.logical_and(cell_count < 0.5,
                                    tf.not_equal(out_index.indices, 0))
        column_logits += CLOSE_ENOUGH_TO_LOG_ZERO * \
            tf.cast(is_padding, tf.float32)

        if not self.allow_empty_column_selection:
            column_logits += CLOSE_ENOUGH_TO_LOG_ZERO * tf.cast(
                tf.equal(out_index.indices, 0), tf.float32)

        return column_logits

Beispiel #6

0

Datei anzeigen

    def call(self, input_token_ids, input_mask, segment_ids, column_ids,
             row_ids, prev_label_ids, column_ranks, inv_column_ranks,
             numeric_relations, label_ids, **kwargs):

        # Construct indices for the table.
        row_index = segmented_tensor.IndexMap(
            indices=tf.minimum(tf.cast(row_ids, tf.int32),
                               self.tapas_classifier_config.max_num_rows - 1),
            num_segments=self.tapas_classifier_config.max_num_rows,
            batch_dims=1)
        col_index = segmented_tensor.IndexMap(
            indices=tf.minimum(
                tf.cast(column_ids, tf.int32),
                self.tapas_classifier_config.max_num_columns - 1),
            num_segments=self.tapas_classifier_config.max_num_columns,
            batch_dims=1)
        cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

        # Masks.
        # <float32>[batch_size, seq_length]
        table_mask = tf.where(row_ids > 0, tf.ones_like(row_ids),
                              tf.zeros_like(row_ids))
        input_mask_float = tf.cast(input_mask, tf.float32)
        table_mask_float = tf.cast(table_mask, tf.float32)

        # Mask for cells that exist in the table (i.e. that are not padding).
        cell_mask, _ = segmented_tensor.reduce_mean(input_mask_float,
                                                    cell_index)

        pooled_output, sequence_output = self.bert([
            input_token_ids, input_mask, segment_ids, column_ids, row_ids,
            prev_label_ids, column_ranks, inv_column_ranks, numeric_relations
        ], **kwargs)
        # Compute logits per token. These are used to select individual cells.
        logits = self.compute_token_logits(sequence_output)
        # Compute logits per column. These are used to select a column.
        if self.tapas_classifier_config.select_one_column:
            column_logits = self.compute_column_logits(sequence_output,
                                                       cell_index, cell_mask)

        logits_cls = None
        if self.do_model_classification:
            logits_cls = self.compute_classification_logits(pooled_output)

        if self.tapas_classifier_config.average_logits_per_cell:
            logits_per_cell, _ = segmented_tensor.reduce_mean(
                logits, cell_index)
            logits = segmented_tensor.gather(logits_per_cell, cell_index)
        dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        if self.tapas_classifier_config.select_one_column:
            logits = single_column_cell_selection(logits, column_logits,
                                                  label_ids, cell_index,
                                                  col_index, cell_mask)
            dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        logits_aggregation = None
        if self.do_model_aggregation:
            logits_aggregation = self.calculate_aggregation_logits(
                pooled_output)

        probs = _get_probs(dist_per_token) * input_mask_float

        return logits, probs, logits_aggregation, logits_cls

Beispiel #7

0

Datei anzeigen

def single_column_cell_selection(token_logits, column_logits, label_ids,
                                 cell_index, col_index, cell_mask):
    """Computes the loss for cell selection constrained to a single column.

    The loss is a hierarchical log-likelihood. The model first predicts a column
    and then selects cells within that column (conditioned on the column). Cells
    outside the selected column are never selected.

    Args:
      token_logits: <float>[batch_size, seq_length] Logits per token.
      column_logits: <float>[batch_size, max_num_cols] Logits per column.
      label_ids: <int32>[batch_size, seq_length] Labels per token.
      cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
        groups tokens into cells.
      col_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
        groups tokens into columns.
      cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
        cell, 1 for cells that exists in the example and 0 for padding.

    Returns:
      selection_loss_per_example: <float>[batch_size] Loss for each example.
      logits: <float>[batch_size, seq_length] New logits which are only allowed
        to select cells in a single column. Logits outside of the most likely
        column according to `column_logits` will be set to a very low value
        (such that the probabilities are 0).
    """
    # First find the column we should select. We use the column with maximum
    # number of selected cells.
    labels_per_column, _ = segmented_tensor.reduce_sum(
        tf.cast(label_ids, tf.float32), col_index)
    column_label = tf.argmax(labels_per_column, axis=-1, output_type=tf.int32)
    # Check if there are no selected cells in the column. In that case the model
    # should predict the special column id 0, which means "select nothing".
    no_cell_selected = tf.equal(tf.reduce_max(labels_per_column, axis=-1), 0)
    column_label = tf.where(no_cell_selected, tf.zeros_like(column_label),
                            column_label)

    column_dist = tfp.distributions.Categorical(logits=column_logits)

    # Reduce the labels and logits to per-cell from per-token.
    logits_per_cell, _ = segmented_tensor.reduce_mean(token_logits, cell_index)
    _, labels_index = segmented_tensor.reduce_max(tf.cast(label_ids, tf.int32),
                                                  cell_index)

    # Mask for the selected column.
    column_id_for_cells = cell_index.project_inner(labels_index).indices

    # Set the probs outside the selected column (selected by the *model*)
    # to 0. This ensures backwards compatibility with models that select
    # cells from multiple columns.
    selected_column_id = tf.argmax(column_logits,
                                   axis=-1,
                                   output_type=tf.int32)
    selected_column_mask = tf.cast(
        tf.equal(column_id_for_cells,
                 tf.expand_dims(selected_column_id, axis=-1)), tf.float32)
    # Never select cells with the special column id 0.
    selected_column_mask = tf.where(tf.equal(column_id_for_cells, 0),
                                    tf.zeros_like(selected_column_mask),
                                    selected_column_mask)
    logits_per_cell += CLOSE_ENOUGH_TO_LOG_ZERO * (
        1.0 - cell_mask * selected_column_mask)
    logits = segmented_tensor.gather(logits_per_cell, cell_index)

    return logits

Beispiel #8

0

Datei anzeigen

def _get_classification_outputs(
    config,
    is_training,
    output_layer,
    output_layer_aggregation,
    label_ids,
    input_mask,
    table_mask,
    aggregation_function_id,
    answer,
    numeric_values,
    numeric_values_scale,
    row_ids,
    column_ids,
    classification_class_index,
):
    """Creates a classification model.

  Args:
    config: Configuration for Tapas model.
    is_training: Whether the model is training.
    output_layer: <float32>[batch_size, seq_length, hidden_size]
    output_layer_aggregation: <float32>[batch_size, hidden_size]
    label_ids: <int32>[batch_size, seq_length]
    input_mask: <int32>[batch_size, seq_length]
    table_mask: <int32>[batch_size, seq_length]
    aggregation_function_id: <int32>[batch_size]
    answer: <float32>[batch_size]
    numeric_values: <float32>[batch_size, seq_length]
    numeric_values_scale: <float32>[batch_size, seq_length]
    row_ids: <int32>[batch_size, seq_length]
    column_ids: <int32>[batch_size, seq_length]
    classification_class_index: <int32>[batch]

  Returns:
    Outputs
  """
    if is_training:
        # I.e., 0.1 dropout
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    # Construct indices for the table.
    row_index = segmented_tensor.IndexMap(indices=tf.minimum(
        row_ids, config.max_num_rows - 1),
                                          num_segments=config.max_num_rows,
                                          batch_dims=1)
    col_index = segmented_tensor.IndexMap(indices=tf.minimum(
        column_ids, config.max_num_columns - 1),
                                          num_segments=config.max_num_columns,
                                          batch_dims=1)
    cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

    # Masks.
    # <float32>[batch_size, seq_length]
    input_mask_float = tf.cast(input_mask, tf.float32)
    table_mask_float = tf.cast(table_mask, tf.float32)
    # Mask for cells that exist in the table (i.e. that are not padding).
    cell_mask, _ = segmented_tensor.reduce_mean(input_mask_float, cell_index)

    # Compute logits per token. These are used to select individual cells.
    logits = utils.compute_token_logits(
        output_layer=output_layer,
        temperature=config.temperature,
        init_cell_selection_weights_to_zero=(
            config.init_cell_selection_weights_to_zero))

    # Compute logits per column. These are used to select a column.
    if config.select_one_column:
        column_logits = utils.compute_column_logits(
            output_layer=output_layer,
            cell_index=cell_index,
            cell_mask=cell_mask,
            init_cell_selection_weights_to_zero=(
                config.init_cell_selection_weights_to_zero),
            allow_empty_column_selection=config.allow_empty_column_selection)

    # TODO(pawelnow): Extract this into a function.
    # Compute aggregation function logits.
    do_model_aggregation = config.num_aggregation_labels > 0
    if do_model_aggregation:
        hidden_size_agg = output_layer_aggregation.shape[-1].value
        output_weights_agg = tf.get_variable(
            "output_weights_agg",
            shape=[config.num_aggregation_labels, hidden_size_agg],
            initializer=_classification_initializer())
        output_bias_agg = tf.get_variable(
            "output_bias_agg",
            shape=[config.num_aggregation_labels],
            initializer=tf.zeros_initializer())

    do_model_classification = config.num_classification_labels > 0
    logits_cls = None
    if do_model_classification:
        logits_cls = compute_classification_logits(
            config.num_classification_labels, output_layer_aggregation)

    with tf.variable_scope("loss"):
        total_loss = 0.0
        is_supervised = (not do_model_aggregation
                         or not config.use_answer_as_supervision)

        ### Semi-supervised cell selection in case of no aggregation
        #############################################################

        # If the answer (the denotation) appears directly in the table we might
        # select the answer without applying any aggregation function. There are
        # some ambiguous cases, see _calculate_aggregate_mask for more info.
        # `aggregate_mask` is 1 for examples where we chose to aggregate and 0
        #  for examples where we chose to select the answer directly.
        # `label_ids` encodes the positions of the answer appearing in the table.
        if is_supervised:
            aggregate_mask = None
        else:
            # <float32>[batch_size]
            aggregate_mask = _calculate_aggregate_mask(
                answer=answer,
                output_layer_aggregation=output_layer_aggregation,
                output_bias_agg=output_bias_agg,
                output_weights_agg=output_weights_agg,
                cell_select_pref=config.cell_select_pref,
                label_ids=label_ids)

        ### Cell selection log-likelihood
        ###################################

        if config.average_logits_per_cell:
            logits_per_cell, _ = segmented_tensor.reduce_mean(
                logits, cell_index)
            logits = segmented_tensor.gather(logits_per_cell, cell_index)
        dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        selection_loss_per_example = None
        if config.select_one_column:
            selection_loss_per_example, logits = _single_column_cell_selection_loss(
                token_logits=logits,
                column_logits=column_logits,
                label_ids=label_ids,
                cell_index=cell_index,
                col_index=col_index,
                cell_mask=cell_mask)
            dist_per_token = tfp.distributions.Bernoulli(logits=logits)
        else:
            weight = tf.where(
                label_ids == 0, tf.ones_like(label_ids, dtype=tf.float32),
                config.positive_weight *
                tf.ones_like(label_ids, dtype=tf.float32))
            selection_loss_per_token = -dist_per_token.log_prob(
                label_ids) * weight
            selection_loss_per_example = (
                tf.reduce_sum(selection_loss_per_token * input_mask_float,
                              axis=1) /
                (tf.reduce_sum(input_mask_float, axis=1) +
                 _EPSILON_ZERO_DIVISION))

        ### Logits for the aggregation function
        #########################################

        logits_aggregation = None
        if do_model_aggregation:
            logits_aggregation = _calculate_aggregation_logits(
                output_layer_aggregation, output_weights_agg, output_bias_agg)

        ### Classification loss
        ###############################
        if do_model_classification:
            one_hot_labels = tf.one_hot(classification_class_index,
                                        depth=config.num_classification_labels,
                                        dtype=tf.float32)
            if config.classification_label_weight:
                label_weights = [
                    config.classification_label_weight.get(i, 1.0)
                    for i in range(config.num_classification_labels)
                ]
                one_hot_labels *= tf.constant(label_weights, dtype=tf.float32)
            log_probs = tf.nn.log_softmax(logits_cls, axis=-1)
            # <float32>[batch_size]
            per_example_classification_intermediate = -tf.reduce_sum(
                one_hot_labels * log_probs, axis=-1)

            cls_loss = tf.reduce_mean(per_example_classification_intermediate)
            total_loss += cls_loss

        ### Supervised cell selection
        ###############################

        span_indexes = None
        span_logits = None
        if config.span_prediction != SpanPredictionMode.NONE:
            (
                span_indexes,
                span_logits,
                span_loss,
            ) = span_prediction_utils.get_span_logits_by_mode(
                config.span_prediction,
                output_layer,
                label_ids,
                column_ids,
                row_ids,
                max_span_length=10,
            )
            total_loss += span_loss
        elif config.disable_per_token_loss:
            pass
        elif config.mask_examples_without_labels:
            total_loss += tf.reduce_mean(
                span_prediction_utils.compute_masked_example_loss(
                    label_ids,
                    selection_loss_per_example,
                ))
        elif is_supervised:
            total_loss += tf.reduce_mean(selection_loss_per_example)
        else:
            # For the not supervissed case, do not assign loss for cell selection
            total_loss += tf.reduce_mean(selection_loss_per_example *
                                         (1.0 - aggregate_mask))

        ### Semi-supervised regression loss and supervised loss for aggregations
        #########################################################################

        if do_model_aggregation:
            # Note that `aggregate_mask` is None if the setting is supervised.
            per_example_additional_loss = _calculate_aggregation_loss(
                logits_aggregation, aggregate_mask, aggregation_function_id,
                config)

            if config.use_answer_as_supervision:
                # Add regression loss for numeric answers which require aggregation.
                answer_loss, large_answer_loss_mask = _calculate_regression_loss(
                    answer, aggregate_mask, dist_per_token, numeric_values,
                    numeric_values_scale, table_mask_float, logits_aggregation,
                    config)
                per_example_additional_loss += answer_loss
                # Zero loss for examples with answer_loss > cutoff.
                per_example_additional_loss *= large_answer_loss_mask

            total_loss += tf.reduce_mean(per_example_additional_loss)

        return Outputs(
            total_loss=total_loss,
            logits=logits,
            probs=_get_probs(dist_per_token) * input_mask_float,
            logits_aggregation=logits_aggregation,
            logits_cls=logits_cls,
            span_indexes=span_indexes,
            span_logits=span_logits,
        )