Exemple #1
0
def unique_sequences(
        src: _k2.RaggedInt,
        need_num_repeats: bool = True,
        need_new2old_indexes: bool = False) -> \
                Tuple[_k2.RaggedInt, Optional[_k2.RaggedInt], Optional[torch.Tensor]]:  # noqa
    '''Remove repeated sequences.

    If `src` has two axes, this will return the unique sub-lists (in a possibly
    different order, but without repeats).  If `src` has 3 axes, it will
    do the above but separately for each index on axis 0; if more than 3 axes,
    the earliest axes will be ignored.

    Caution:
      It does not completely guarantee that all unique sequences will be
      present in the output, as it relies on a hash and ignores collisions.
      If several sequences have the same hash, only one of them is kept, even
      if the actual content in the sequence is different.

    Caution:
      Even if there are no repeated sequences, the output may be different
      from `src`. That is, `new2old_indexes` may NOT be an identity map even if
      nothing was removed.

    Args:
      src:
        The input ragged tensor. Must have `src.num_axes() == 2`
        or `src_num_axes() == 3`
      need_num_repeats:
        If True, it also returns the number of repeats of each sequence.
      need_new2old_indexes:
        If true, it returns an extra 1-D tensor `new2old_indexes`.
        If `src` has 2 axes, this tensor contains `src_idx0`;
        if `src` has 3 axes, this tensor contains `src_idx01`.

        Caution:
          For repeated sublists, only one of them is kept.
          The choice of which one to keep is **deterministic** and
          is an implementation detail.


    Returns:
     Returns a tuple containing:
       - ans: A ragged tensor with the same number of axes as `src` and possibly
         fewer elements due to removing repeated sequences on the last axis
         (and with the last-but-one indexes possibly in a different order).

       - num_repeats: A tensor containing number of repeats of each returned
         sequence if `need_num_repeats` is True; it is None otherwise. If it is
         not None, num_repeats.num_axes() is always 2. If ans.num_axes() is 2,
         then num_repeats.dim0() == 1 and
         num_repeats.num_elements() == ans.dim0().
         If ans.num_axes() is 3, then num_repeats.dim0() == ans.dim0() and
         num_repeats.num_elements() == ans.tot_size(1).

       - new2old_indexes: A 1-D tensor whose i-th element specifies the
         input sublist that the i-th output sublist corresponds to.
    '''
    return _k2.unique_sequences(src,
                                need_num_repeats=need_num_repeats,
                                need_new2old_indexes=need_new2old_indexes)
Exemple #2
0
def unique_sequences(
    src: _k2.RaggedInt,
    need_num_repeats: bool = True
) -> Tuple[_k2.RaggedInt, Optional[_k2.RaggedInt]]:  # noqa
    '''Remove repeated sequences.

    If `src` has two axes, this will return the unique sub-lists (in a possibly
    different order, but without repeats).  If `src` has 3 axes, it will
    do the above but separately for each index on axis 0; if more than 3 axes,
    the earliest axes will be ignored.

    Caution:
      It does not completely guarantee that all unique sequences will be
      present in the output, as it relies on a hash and ignores collisions.
      If several sequences have the same hash, only one of them is kept, even
      if the actual content in the sequence is different.

    Args:
      src:
        The input ragged tensor. Must have `src.num_axes() == 2`
        or `src_num_axes() == 3`
      need_num_repeats:
        If True, it also returns the number of repeats of each sequence.

    Returns:
     Returns a tuple containing:
       - ans: A ragged tensor with the same number of axes as `src` and possibly
         fewer elements due to removing repeated sequences on the last axis
         (and with the last-but-one indexes possibly in a different order).

       - num_repeats: A tensor containing number of repeats of each returned
         sequence if `need_num_repeats` is True; it is None otherwise. If it is
         not None, num_repeats.num_axes() is always 2. If ans.num_axes() is 2,
         then num_repeats.dim0() == 1 and
         num_repeats.num_elements() == ans.dim0().
         If ans.num_axes() is 3, then num_repeats.dim0() == ans.dim0() and
         num_repeats.num_elements() == ans.tot_size(1).
    '''
    return _k2.unique_sequences(src, need_num_repeats=need_num_repeats)