def _check_param_device(param: torch.Tensor,
                        old_param_device: Optional[int]) -> int:
    r"""This helper function is to check if the parameters are located
    in the same device. Currently, the conversion between model parameters
    and single vector form is not supported for multiple allocations,
    e.g. parameters in different GPUs, or mixture of CPU/GPU.

    Arguments:
        param ([Tensor]): a Tensor of a parameter of a model
        old_param_device (int): the device where the first parameter of a
                                model is allocated.

    Returns:
        old_param_device (int): report device for the first time
    """

    # Meet the first parameter
    if old_param_device is None:
        old_param_device = param.get_device() if param.is_cuda else -1
    else:
        warn = False
        if param.is_cuda:  # Check if in same GPU
            warn = (param.get_device() != old_param_device)
        else:  # Check if in CPU
            warn = (old_param_device != -1)
        if warn:
            raise TypeError('Found two parameters on different devices, '
                            'this is currently not supported.')
    return old_param_device
Exemple #2
0
 def check_hidden_size(
         self,
         hx: Tensor,
         expected_hidden_size: Tuple[int, int, int],
         msg: str = 'Expected hidden size {}, got {}') -> None:
     if hx.size() != expected_hidden_size:
         raise RuntimeError(
             msg.format(expected_hidden_size, tuple(hx.size())))
Exemple #3
0
 def check_input(self, input: Tensor,
                 batch_sizes: Optional[Tensor]) -> None:
     expected_input_dim = 2 if batch_sizes is not None else 3
     if input.dim() != expected_input_dim:
         raise RuntimeError('input must have {} dimensions, got {}'.format(
             expected_input_dim, input.dim()))
     if self.input_size != input.size(-1):
         raise RuntimeError(
             'input.size(-1) must be equal to input_size. Expected {}, got {}'
             .format(self.input_size, input.size(-1)))
Exemple #4
0
 def get_expected_hidden_size(
         self, input: Tensor,
         batch_sizes: Optional[Tensor]) -> Tuple[int, int, int]:
     if batch_sizes is not None:
         mini_batch = batch_sizes[0]
         mini_batch = int(mini_batch)
     else:
         mini_batch = input.size(0) if self.batch_first else input.size(1)
     num_directions = 2 if self.bidirectional else 1
     expected_hidden_size = (self.num_layers * num_directions, mini_batch,
                             self.hidden_size)
     return expected_hidden_size
Exemple #5
0
 def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
     self.check_forward_input(input)
     if hx is None:
         hx = torch.zeros(input.size(0),
                          self.hidden_size,
                          dtype=input.dtype,
                          device=input.device)
     self.check_forward_hidden(input, hx, '')
     if self.nonlinearity == "tanh":
         ret = _VF.rnn_tanh_cell(
             input,
             hx,
             self.weight_ih,
             self.weight_hh,
             self.bias_ih,
             self.bias_hh,
         )
     elif self.nonlinearity == "relu":
         ret = _VF.rnn_relu_cell(
             input,
             hx,
             self.weight_ih,
             self.weight_hh,
             self.bias_ih,
             self.bias_hh,
         )
     else:
         ret = input  # TODO: remove when jit supports exception flow
         raise RuntimeError("Unknown nonlinearity: {}".format(
             self.nonlinearity))
     return ret
Exemple #6
0
    def forward(self,
                input: Tensor,
                hx: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
        is_packed = isinstance(input, PackedSequence)
        if is_packed:
            input, batch_sizes, sorted_indices, unsorted_indices = input
            max_batch_size = batch_sizes[0]
            max_batch_size = int(max_batch_size)
        else:
            batch_sizes = None
            max_batch_size = input.size(0) if self.batch_first else input.size(
                1)
            sorted_indices = None
            unsorted_indices = None

        if hx is None:
            num_directions = 2 if self.bidirectional else 1
            hx = torch.zeros(self.num_layers * num_directions,
                             max_batch_size,
                             self.hidden_size,
                             dtype=input.dtype,
                             device=input.device)
        else:
            # Each batch of the hidden state should match the input sequence that
            # the user believes he/she is passing in.
            hx = self.permute_hidden(hx, sorted_indices)

        self.check_forward_args(input, hx, batch_sizes)
        _impl = _rnn_impls[self.mode]
        if batch_sizes is None:
            result = _impl(input, hx, self._flat_weights, self.bias,
                           self.num_layers, self.dropout, self.training,
                           self.bidirectional, self.batch_first)
        else:
            result = _impl(input, batch_sizes, hx, self._flat_weights,
                           self.bias, self.num_layers, self.dropout,
                           self.training, self.bidirectional)
        output = result[0]
        hidden = result[1]

        if is_packed:
            output = PackedSequence(output, batch_sizes, sorted_indices,
                                    unsorted_indices)
        return output, self.permute_hidden(hidden, unsorted_indices)
Exemple #7
0
    def forward(self, input: Tensor, target: Tensor) -> _ASMoutput:
        if input.size(0) != target.size(0):
            raise RuntimeError('Input and target should have the same size '
                               'in the batch dimension.')

        used_rows = 0
        batch_size = target.size(0)

        output = input.new_zeros(batch_size)
        gather_inds = target.new_empty(batch_size)

        cutoff_values = [0] + self.cutoffs
        for i in range(len(cutoff_values) - 1):

            low_idx = cutoff_values[i]
            high_idx = cutoff_values[i + 1]

            target_mask = (target >= low_idx) & (target < high_idx)
            row_indices = target_mask.nonzero().squeeze()

            if row_indices.numel() == 0:
                continue

            if i == 0:
                gather_inds.index_copy_(0, row_indices, target[target_mask])

            else:
                relative_target = target[target_mask] - low_idx
                input_subset = input.index_select(0, row_indices)

                cluster_output = self.tail[i - 1](input_subset)
                cluster_index = self.shortlist_size + i - 1

                gather_inds.index_fill_(0, row_indices, cluster_index)

                cluster_logprob = log_softmax(cluster_output, dim=1)
                local_logprob = cluster_logprob.gather(1, relative_target.unsqueeze(1))
                output.index_copy_(0, row_indices, local_logprob.squeeze(1))

            used_rows += row_indices.numel()

        if used_rows != batch_size:
            raise RuntimeError("Target values should be in [0, {}], "
                               "but values in range [{}, {}] "
                               "were found. ".format(self.n_classes - 1,
                                                     target.min().item(),
                                                     target.max().item()))

        head_output = self.head(input)
        head_logprob = log_softmax(head_output, dim=1)
        output += head_logprob.gather(1, gather_inds.unsqueeze(1)).squeeze()
        loss = (-output).mean()

        return _ASMoutput(output, loss)
Exemple #8
0
 def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
     self.check_forward_input(input)
     if hx is None:
         hx = torch.zeros(input.size(0),
                          self.hidden_size,
                          dtype=input.dtype,
                          device=input.device)
     self.check_forward_hidden(input, hx, '')
     return _VF.gru_cell(
         input,
         hx,
         self.weight_ih,
         self.weight_hh,
         self.bias_ih,
         self.bias_hh,
     )
Exemple #9
0
    def check_forward_hidden(self,
                             input: Tensor,
                             hx: Tensor,
                             hidden_label: str = '') -> None:
        if input.size(0) != hx.size(0):
            raise RuntimeError(
                "Input batch size {} doesn't match hidden{} batch size {}".
                format(input.size(0), hidden_label, hx.size(0)))

        if hx.size(1) != self.hidden_size:
            raise RuntimeError(
                "hidden{} has inconsistent hidden_size: got {}, expected {}".
                format(hidden_label, hx.size(1), self.hidden_size))
Exemple #10
0
    def from_pretrained(cls, embeddings: Tensor, freeze: bool = True, max_norm: Optional[float] = None,
                        norm_type: float = 2., scale_grad_by_freq: bool = False,
                        mode: str = 'mean', sparse: bool = False, include_last_offset: bool = False) -> 'EmbeddingBag':
        r"""Creates EmbeddingBag instance from given 2-dimensional FloatTensor.

        Args:
            embeddings (Tensor): FloatTensor containing weights for the EmbeddingBag.
                First dimension is being passed to EmbeddingBag as 'num_embeddings', second as 'embedding_dim'.
            freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process.
                Equivalent to ``embeddingbag.weight.requires_grad = False``. Default: ``True``
            max_norm (float, optional): See module initialization documentation. Default: ``None``
            norm_type (float, optional): See module initialization documentation. Default ``2``.
            scale_grad_by_freq (boolean, optional): See module initialization documentation. Default ``False``.
            mode (string, optional): See module initialization documentation. Default: ``"mean"``
            sparse (bool, optional): See module initialization documentation. Default: ``False``.
            include_last_offset (bool, optional): See module initialization documentation. Default: ``False``.

        Examples::

            >>> # FloatTensor containing pretrained weights
            >>> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]])
            >>> embeddingbag = nn.EmbeddingBag.from_pretrained(weight)
            >>> # Get embeddings for index 1
            >>> input = torch.LongTensor([[1, 0]])
            >>> embeddingbag(input)
            tensor([[ 2.5000,  3.7000,  4.6500]])
        """
        assert embeddings.dim() == 2, \
            'Embeddings parameter is expected to be 2-dimensional'
        rows, cols = embeddings.shape
        embeddingbag = cls(
            num_embeddings=rows,
            embedding_dim=cols,
            _weight=embeddings,
            max_norm=max_norm,
            norm_type=norm_type,
            scale_grad_by_freq=scale_grad_by_freq,
            mode=mode,
            sparse=sparse,
            include_last_offset=include_last_offset)
        embeddingbag.weight.requires_grad = not freeze
        return embeddingbag
Exemple #11
0
 def forward(self,
             input: Tensor,
             hx: Optional[Tuple[Tensor,
                                Tensor]] = None) -> Tuple[Tensor, Tensor]:
     self.check_forward_input(input)
     if hx is None:
         zeros = torch.zeros(input.size(0),
                             self.hidden_size,
                             dtype=input.dtype,
                             device=input.device)
         hx = (zeros, zeros)
     self.check_forward_hidden(input, hx[0], '[0]')
     self.check_forward_hidden(input, hx[1], '[1]')
     return _VF.lstm_cell(
         input,
         hx,
         self.weight_ih,
         self.weight_hh,
         self.bias_ih,
         self.bias_hh,
     )
Exemple #12
0
 def check_forward_input(self, input: Tensor) -> None:
     if input.size(1) != self.input_size:
         raise RuntimeError(
             "input has inconsistent input_size: got {}, expected {}".
             format(input.size(1), self.input_size))
Exemple #13
0
def apply_permutation(tensor: Tensor,
                      permutation: Tensor,
                      dim: int = 1) -> Tensor:
    return tensor.index_select(dim, permutation)
Exemple #14
0
    def forward(self,
                src: Tensor,
                tgt: Tensor,
                src_mask: Optional[Tensor] = None,
                tgt_mask: Optional[Tensor] = None,
                memory_mask: Optional[Tensor] = None,
                src_key_padding_mask: Optional[Tensor] = None,
                tgt_key_padding_mask: Optional[Tensor] = None,
                memory_key_padding_mask: Optional[Tensor] = None) -> Tensor:
        r"""Take in and process masked source/target sequences.

        Args:
            src: the sequence to the encoder (required).
            tgt: the sequence to the decoder (required).
            src_mask: the additive mask for the src sequence (optional).
            tgt_mask: the additive mask for the tgt sequence (optional).
            memory_mask: the additive mask for the encoder output (optional).
            src_key_padding_mask: the ByteTensor mask for src keys per batch (optional).
            tgt_key_padding_mask: the ByteTensor mask for tgt keys per batch (optional).
            memory_key_padding_mask: the ByteTensor mask for memory keys per batch (optional).

        Shape:
            - src: :math:`(S, N, E)`.
            - tgt: :math:`(T, N, E)`.
            - src_mask: :math:`(S, S)`.
            - tgt_mask: :math:`(T, T)`.
            - memory_mask: :math:`(T, S)`.
            - src_key_padding_mask: :math:`(N, S)`.
            - tgt_key_padding_mask: :math:`(N, T)`.
            - memory_key_padding_mask: :math:`(N, S)`.

            Note: [src/tgt/memory]_mask ensures that position i is allowed to attend the unmasked
            positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
            while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
            are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
            is provided, it will be added to the attention weight. 
            [src/tgt/memory]_key_padding_mask provides specified elements in the key to be ignored by
            the attention. If a ByteTensor is provided, the non-zero positions will be ignored while the zero
            positions will be unchanged. If a BoolTensor is provided, the positions with the
            value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.

            - output: :math:`(T, N, E)`.

            Note: Due to the multi-head attention architecture in the transformer model,
            the output sequence length of a transformer is same as the input sequence
            (i.e. target) length of the decode.

            where S is the source sequence length, T is the target sequence length, N is the
            batch size, E is the feature number

        Examples:
            >>> output = transformer_model(src, tgt, src_mask=src_mask, tgt_mask=tgt_mask)
        """

        if src.size(1) != tgt.size(1):
            raise RuntimeError("the batch number of src and tgt must be equal")

        if src.size(2) != self.d_model or tgt.size(2) != self.d_model:
            raise RuntimeError(
                "the feature number of src and tgt must be equal to d_model")

        memory = self.encoder(src,
                              mask=src_mask,
                              src_key_padding_mask=src_key_padding_mask)
        output = self.decoder(tgt,
                              memory,
                              tgt_mask=tgt_mask,
                              memory_mask=memory_mask,
                              tgt_key_padding_mask=tgt_key_padding_mask,
                              memory_key_padding_mask=memory_key_padding_mask)
        return output