Ejemplo n.º 1
0
    def __call__(self, tokens: torch.LongTensor, prefix_mask: torch.LongTensor):
        padding_mask = tokens.new_ones(tokens.size(), dtype=torch.bool)
        for pad in self.excludes:
            padding_mask &= (tokens != pad)
        padding_mask &= prefix_mask  # Only mask prefixes since the others won't be attended
        # Create a uniformly random mask selecting either the original words or OOV tokens
        dropout_mask = (tokens.new_empty(tokens.size(), dtype=torch.float).uniform_() < self.mask_prob)
        oov_mask = dropout_mask & padding_mask

        oov_fill = tokens.new_empty(tokens.size(), dtype=torch.long).fill_(self.oov)

        result = torch.where(oov_mask, oov_fill, tokens)
        return result, oov_mask
Ejemplo n.º 2
0
def scatter_sort(
    src: Tensor,
    index: LongTensor,
    descending=False,
    dim_size=None,
    out: Optional[Tuple[Tensor, LongTensor]] = None,
) -> Tuple[Tensor, LongTensor]:
    if src.ndimension() > 1:
        raise ValueError("Only implemented for 1D tensors")

    if dim_size is None:
        dim_size = index.max() + 1

    if out is None:
        result_values = torch.empty_like(src)
        result_indexes = index.new_empty(src.shape)
    else:
        result_values, result_indexes = out

    sizes = (
        index.new_zeros(dim_size)
        .scatter_add_(dim=0, index=index, src=torch.ones_like(index))
        .tolist()
    )

    start = 0
    for size in sizes:
        end = start + size
        values, indexes = torch.sort(src[start:end], dim=0, descending=descending)
        result_values[start:end] = values
        result_indexes[start:end] = indexes + start
        start = end

    return result_values, result_indexes
Ejemplo n.º 3
0
    def token_dropout(tokens: torch.LongTensor,
                      oov_token: int,
                      exclude_tokens: List[int],
                      p: float = 0.2,
                      training: float = True) -> torch.LongTensor:
        """During training, randomly replaces some of the non-padding tokens to a mask token with probability ``p``
        
        Adopted from https://github.com/Hyperparticle/udify

        Args:
          tokens: The current batch of padded sentences with word ids
          oov_token: The mask token
          exclude_tokens: The tokens for padding the input batch
          p: The probability a word gets mapped to the unknown token
          training: Applies the dropout if set to ``True``
          tokens: torch.LongTensor: 
          oov_token: int: 
          exclude_tokens: List[int]: 
          p: float:  (Default value = 0.2)
          training: float:  (Default value = True)

        Returns:
          A copy of the input batch with token dropout applied

        """
        if training and p > 0:
            # This creates a mask that only considers unpadded tokens for mapping to oov
            padding_mask = tokens.new_ones(tokens.size(), dtype=torch.bool)
            for pad in exclude_tokens:
                padding_mask &= (tokens != pad)

            # Create a uniformly random mask selecting either the original words or OOV tokens
            dropout_mask = (tokens.new_empty(tokens.size(),
                                             dtype=torch.float).uniform_() < p)
            oov_mask = dropout_mask & padding_mask

            oov_fill = tokens.new_empty(tokens.size(),
                                        dtype=torch.long).fill_(oov_token)

            result = torch.where(oov_mask, oov_fill, tokens)

            return result
        else:
            return tokens
Ejemplo n.º 4
0
    def forward(self, inp: torch.FloatTensor, tgt: torch.LongTensor):
        if inp.size(0) != tgt.size(0):
            raise RuntimeError('Input and target should have the same size '
                               'in the batch dimension.')
        num_elms = 0
        entry_size = tgt.size(0)
        output = inp.new_zeros(entry_size)  # log probabilities
        gather_inds = tgt.new_empty(entry_size)  # tgt indices in head

        for i in range(self.n_clusters + 1):
            target_mask, rel_inds = \
                get_cluster_members(i, tgt, self.cutoffs, self.ent_slices)
            # members of the current cluster
            members = target_mask.nonzero().squeeze()
            if members.numel() == 0:
                continue
            if i == 0:  # Head cluster
                # Head cluster also needs to compute relative indices
                gather_inds.index_copy_(0, members, rel_inds[target_mask])
            else:  # Tail clusters including entity clusters
                cluster_index = self.cutoffs[0] + i - 1
                gather_inds.index_fill_(0, members, cluster_index)

                # Subset of input which elements should be in this cluster
                input_subset = inp.index_select(0, members)
                # Forward
                cluster_output = self.tail[i - 1](input_subset)
                cluster_logprob = F.log_softmax(cluster_output, dim=1)
                relative_target = rel_inds[target_mask]
                local_logprob = \
                    cluster_logprob.gather(1, relative_target.unsqueeze(1))
                output.index_copy_(0, members, local_logprob.squeeze(1))

            num_elms += members.numel()

        if num_elms != entry_size:
            logger.error('used_rows ({}) and batch_size ({}) does not match'
                         ''.format(num_elms, entry_size))
            raise RuntimeError("Target values should be in [0, {}], "
                               "but values in range [{}, {}] "
                               "were found. ".format(self.n_classes - 1,
                                                     tgt.min().item(),
                                                     tgt.max().item()))

        head_output = self.head(inp)
        head_logprob = F.log_softmax(head_output, dim=1)
        output += head_logprob.gather(1, gather_inds.unsqueeze(1)).squeeze()

        # return neglog
        return -output