Example #1
0
def make_pad_mask(lengths: Union[torch.LongTensor, List[int]],
                  xs: torch.FloatTensor, length_dim: int):
    if length_dim == 0:
        raise ValueError(f"length_dim cannot be {length_dim}")
    if not isinstance(lengths, list):
        lengths = lengths.tolist()
    bs = int(len(lengths))
    if xs is None:
        maxlen = int(max(lengths))
    else:
        maxlen = xs.shape[length_dim]

    seq_range = torch.arange(0, maxlen, dtype=torch.int64)
    seq_range_expand = seq_range.unsqueeze(0).expand(bs, maxlen)
    seq_length_expand = seq_range_expand.new(lengths).unsqueeze(-1)
    mask = seq_range_expand >= seq_length_expand

    if xs is not None:
        assert xs.shape[0] == bs, (xs.shape[0], bs)
        if length_dim < 0:
            length_dim = xs.dim() + length_dim
        ind = tuple(
            slice(None) if i in (0, length_dim) else None
            for i in range(xs.dim()))
        mask = mask[ind].expand_as(xs).to(xs.device)
    return mask
 def forward(self, src: torch.FloatTensor, attn_mask: torch.FloatTensor) -> torch.FloatTensor:
     # attn mask
     if attn_mask.dim() == 2:
         attn_mask = attn_mask.unsqueeze(0)
     if attn_mask.dim()==3:
         attn_mask = attn_mask.unsqueeze(1)
     # generate q, k, v by Linear
     q, k, v = self.qkv_linear(src).chunk(3, dim=-1)  # bsz*seq_len*hid
     # change shape for multi head
     # q = q.contiguous().view(src.shape[0] * self.n_head, src.shape[1], src.shape[2] // self.n_head)
     # k = k.contiguous().view(src.shape[0] * self.n_head, src.shape[1], src.shape[2] // self.n_head)
     # v = v.contiguous().view(src.shape[0] * self.n_head, src.shape[1], src.shape[2] // self.n_head)
     q = q.contiguous().view(src.shape[0], src.shape[1], self.n_head, src.shape[2] // self.n_head).permute(0, 2, 1, 3) # bsz*n_head*seq_len*h
     k = k.contiguous().view(src.shape[0], src.shape[1], self.n_head, src.shape[2] // self.n_head).permute(0, 2, 3, 1) # bsz*n_head*h*seq_len
     v = v.contiguous().view(src.shape[0], src.shape[1], self.n_head, src.shape[2] // self.n_head).permute(0, 2, 1, 3) # bsz*n_head*seq_len*h
     # compute weight
     attn_weights = torch.matmul(q, k)  # bsz * n_head * seq_len * seq_len
     attn_weights = attn_weights * float((src.shape[2] // self.n_head)) ** -0.5
     attn_weights = attn_weights * attn_mask + (attn_mask - 1) * 1e4
     attn_weights = F.softmax(attn_weights, dim=-1)  # TODO 把dropout加上, attn_weights加
     attn_weights = self.dropout(attn_weights)
     # compute value
     attn_output = torch.matmul(attn_weights, v)
     attn_output = attn_output.permute(0, 2, 1, 3).contiguous().view(src.shape)
     attn_output = self.output_linear(attn_output)
     return attn_output
def cov(m: torch.FloatTensor, rowvar: bool = True, inplace: bool = False):
    '''Estimate a covariance matrix given data.

    Covariance indicates the level to which two variables vary together.
    If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,
    then the covariance matrix element `C_{ij}` is the covariance of
    `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.

    Args:
        m: A 1-D or 2-D array containing multiple variables and observations.
            Each row of `m` represents a variable, and each column a single
            observation of all those variables.
        rowvar: If `rowvar` is True, then each row represents a
            variable, with observations in the columns. Otherwise, the
            relationship is transposed: each column represents a variable,
            while the rows contain observations.

    Returns:
        The covariance matrix of the variables.
    '''
    if m.dim() > 2:
        raise ValueError('m has more than 2 dimensions')
    if m.dim() < 2:
        m = m.view(1, -1)
    if not rowvar and m.size(0) != 1:
        m = m.t()
    # m = m.type(torch.double)  # uncomment this line if desired
    fact = 1.0 / (m.size(1) - 1)
    if inplace:
        m -= torch.mean(m, dim=1, keepdim=True)
    else:
        m = m - torch.mean(m, dim=1, keepdim=True)
    mt = m.t()  # if complex: mt = m.t().conj()
    return fact * m.matmul(mt).squeeze()
Example #4
0
    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:  # type: ignore
        """Adds the stored bias parameters to `x`."""
        assert x.dim() in [2, 4]

        if x.dim() == 2:
            bias = self._bias.t().view(1, -1)
        else:
            bias = self._bias.t().view(1, -1, 1, 1)

        return x + bias  # type:ignore
def img_derivative(input: torch.FloatTensor,
                   sobel_kernel: torch.FloatTensor) -> torch.FloatTensor:
    assert input.dim() == 4
    assert sobel_kernel.dim() == 4
    conv = torch.nn.Conv2d(1,
                           1,
                           kernel_size=3,
                           stride=1,
                           padding=1,
                           bias=False)
    conv.weight = torch.nn.Parameter(sobel_kernel.type_as(input),
                                     requires_grad=False)
    return conv(input)  # [N, C, H, W]
Example #6
0
    def beam_search(
        self,
        img: FloatTensor,
        beam_size: int = 10,
        max_len: int = 200,
        alpha: float = 1.0,
    ) -> str:
        """for inference, one image at a time

        Parameters
        ----------
        img : FloatTensor
            [1, h, w]
        beam_size : int, optional
            by default 10
        max_len : int, optional
            by default 200
        alpha : float, optional
            by default 1.0

        Returns
        -------
        str
            LaTex string
        """
        assert img.dim() == 3
        img_mask = torch.zeros_like(img, dtype=torch.long)  # squeeze channel
        hyps = self.bttr.beam_search(img.unsqueeze(0), img_mask, beam_size,
                                     max_len)
        best_hyp = max(hyps, key=lambda h: h.score / (len(h)**alpha))
        return vocab.indices2label(best_hyp.seq)
Example #7
0
def rank_by_plackettluce(
        scores: _torch.FloatTensor, n: _torch.LongTensor,
        generator: Optional[_torch.Generator] = None) -> _torch.LongTensor:
    """Samples a ranking from a plackett luce distribution.

    This method ensures that padded documents are placed last.

    Args:
        scores: A tensor of size (batch_size, list_size, 1) or
                (batch_size, list_size) containing scores.
        n: A tensor of size (batch_size) containing list size of each query.
    """
    if scores.dim() == 3:
        scores = scores.reshape((scores.shape[0], scores.shape[1]))
    masked_scores = mask_padded_values(scores, n)

    # This implementation uses reservoir sampling, which comes down to doing
    # Uniform(0, 1) ^ (1 / p) and then sorting by the resulting values. The
    # following implementation is a numerically stable variant that operates in
    # log-space.
    log_p = _torch.nn.LogSoftmax(dim=1)(masked_scores)
    rng_kwargs = {"generator": generator} if generator is not None else {}
    u = _torch.rand(log_p.shape, device=scores.device, **rng_kwargs)
    r = _torch.log(-_torch.log(u)) - log_p
    return tiebreak_argsort(r, descending=False, generator=generator)
Example #8
0
    def forward(self, input: torch.FloatTensor, target: torch.LongTensor):
        """
        :param input: (N, C) where C = number of classes.
        :param target: (N) where each value is 0 <= targets[i] <= C-1
        :return: Scaler.
        """
        if input.dim() > 2:
            input = input.view(input.size(0), input.size(1),
                               -1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1, 2)  # N,C,H*W => N,H*W,C
            input = input.contiguous().view(
                -1, input.size(2))  # N,H*W,C => N*H*W,C

        target = target.view(-1, 1)
        logpt = F.log_softmax(input, dim=1)
        logpt = logpt.gather(1, target)
        logpt = logpt.view(-1)
        pt = logpt.exp()

        if self.alpha is not None:
            if self.alpha.type() != input.data.type():
                self.alpha = self.alpha.type_as(input.data)

            at = self.alpha.gather(0, target.data.view(-1))
            logpt = logpt * at

        loss = -1 * (1 - pt)**self.gamma * logpt

        if self.size_average:
            return loss.mean()
        else:
            return loss.sum()
Example #9
0
    def forward(
        self,
        s_hidden_states: FloatTensor,
        t_hidden_states: FloatTensor,
        attention_mask: LongTensor = None,
    ) -> FloatTensor:
        if s_hidden_states.dim() > 3:
            raise TypeError(
                "Cosine loss can be applied only to flatten hiddens")

        if attention_mask is not None:
            # HF transformers case
            return _cosine_loss_hf(
                s_hidden_states=s_hidden_states,
                t_hidden_states=t_hidden_states,
                attention_mask=attention_mask,
            )
        if self.need_mapping:
            assert s_hidden_states.size(-1) == self.student_hidden_state_dim
            assert t_hidden_states.size(-1) == self.teacher_hidden_state_dim
            s_hidden_states = s_hidden_states.reshape(
                -1, self.student_hidden_state_dim)
            t_hidden_states = self.proj(
                t_hidden_states.reshape(-1, self.teacher_hidden_state_dim))
        else:
            hidden_dim = s_hidden_states.size(-1)
            s_hidden_states = s_hidden_states.reshape(-1, hidden_dim)
            t_hidden_states = t_hidden_states.reshape(-1, hidden_dim)

        assert s_hidden_states.shape == t_hidden_states.shape
        target = torch.ones(t_hidden_states.size(0))
        return self.loss_fn(s_hidden_states, t_hidden_states, target)
Example #10
0
def format_tensor_img(t_img: torch.FloatTensor,
                      code: str) -> torch.FloatTensor:
    '''
    Transform the tensor image to a specified format.

    Args:
        t_img: tensor image. must be torch.FloatTensor between 0-1
        code: str
    '''
    assert isinstance(t_img, torch.FloatTensor) and 0 <= t_img.mean() <= 1
    assert t_img.dim() == 3 and t_img.shape[0] == 3
    if code == 'RGB_1':
        pass
    elif code == 'RGB_1_norm':
        means = [0.485, 0.456, 0.406]
        stds = [0.229, 0.224, 0.225]
        t_img = tvf.normalize(t_img, means, stds)
    elif code == 'BGR_255_norm':
        # to BGR, to 255
        t_img = t_img[[2, 1, 0], :, :] * 255
        # normalization
        t_img = tvf.normalize(t_img, [102.9801, 115.9465, 122.7717], [1, 1, 1])
    else:
        raise NotImplementedError()
    return t_img
    def __viterbi_decode(self, emissions: torch.FloatTensor,
                         mask: torch.ByteTensor) -> List[List[int]]:
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.shape[:2] == mask.shape
        assert emissions.size(2) == self.num_tags
        assert mask[0].all()

        seq_length, batch_size = mask.shape
        # self.start_transitions  start 到其他tag(不包含end)的得分
        score = self.start_transitions + emissions[0]
        history = []
        for i in range(1, seq_length):
            broadcast_score = score.unsqueeze(2)
            broadcast_emissions = emissions[i].unsqueeze(1)
            next_score = broadcast_score + self.transitions + broadcast_emissions
            next_score, indices = next_score.max(dim=1)
            score = torch.where(mask[i].unsqueeze(1), next_score, score)
            history.append(indices)
        score += self.end_transitions
        seq_ends = mask.long().sum(dim=0) - 1
        best_tags_list = []

        for idx in range(batch_size):
            _, best_last_tag = score[idx].max(dim=0)
            best_tags = [best_last_tag.item()]
            for hist in reversed(history[:seq_ends[idx]]):
                best_last_tag = hist[idx][best_tags[-1]]
                best_tags.append(best_last_tag.item())
            best_tags.reverse()
            best_tags_list.append(best_tags)
        return best_tags_list
Example #12
0
    def forward(self,
                queries: torch.FloatTensor,
                keys: torch.FloatTensor,
                values: torch.FloatTensor,
                mask: torch.ByteTensor = None) -> torch.Tensor:
        """Runs the attention mechanism.

        Args:
            queries (torch.FloatTensor): The queries as (batch_size x Q x dim_model)-tensor.
            keys (torch.FloatTensor): The keys as (batch_size x KV x dim_model)-tensor.
            values (torch.FloatTensor): The values as (batch_size x KV x dim_model)-tensor.
            mask (torch.ByteTensor, optional): An optional binary mask that indicates which key-value pairs to consider
                for each of the queries. If provided, then this has to be a (batch_size x Q x KV)-tensor.

        Returns:
            torch.FloatTensor: The values computed by the attention mechanism as (batch_size x Q x dim_model)-tensor.
        """
        assert isinstance(queries, torch.FloatTensor) or isinstance(
            queries, torch.cuda.FloatTensor)
        assert isinstance(keys, torch.FloatTensor) or isinstance(
            keys, torch.cuda.FloatTensor)
        assert isinstance(values, torch.FloatTensor) or isinstance(
            values, torch.cuda.FloatTensor)
        assert queries.dim() == 3
        assert keys.dim() == 3
        assert values.dim() == 3
        assert queries.size(0) == keys.size(0)
        assert queries.size(0) == values.size(0)
        assert queries.size(2) == keys.size(2)
        assert queries.size(2) == values.size(2)
        assert keys.size(1) == values.size(1)
        if mask is not None:
            assert isinstance(mask, torch.ByteTensor) or isinstance(
                mask, torch.cuda.ByteTensor)
            assert mask.dim() == 3
            assert queries.size(0) == mask.size(0)
            assert queries.size(1) == mask.size(1)
            assert keys.size(1) == mask.size(2)

        # for each of the attention heads, project inputs to the needed dimensions
        queries, keys, values = self._project_inputs(queries, keys, values)

        # compute attention value
        attn_values = self._apply_attention(queries, keys, values, mask)

        # project retrieved values to needed dimensions
        return self._project_output(attn_values)
Example #13
0
 def eval(self, x: torch.FloatTensor,
          xe: torch.LongTensor) -> torch.FloatTensor:
     # XXX: not to be used for population-based optimisation method
     assert (x.dim() == 2)
     assert (x.shape[0] == self.q)
     sample = self.model.sample_y(x, xe, n_sample=20)
     best_y = sample.min(dim=1).values
     return best_y.mean()
Example #14
0
 def fit(self, x: torch.FloatTensor):
     assert (x.dim() == 2)
     with torch.no_grad():
         self.data_lb = x.min(dim=0).values.detach().clone()
         self.data_ub = x.max(dim=0).values.detach().clone()
         self.fitted = True
         assert (torch.isfinite(self.data_lb).all())
         assert (torch.isfinite(self.data_ub).all())
         return self
Example #15
0
 def fit(self, x: torch.FloatTensor):
     assert (x.dim() == 2)
     with torch.no_grad():
         scaler = MinMaxScaler((self.range_lb, self.range_ub))
         scaler.fit(x.detach().numpy())
         self.scale_ = torch.FloatTensor(scaler.scale_)
         self.min_ = torch.FloatTensor(scaler.min_)
         self.fitted = True
     return self
Example #16
0
    def _forward_hidden(self,
                        s_hidden_states: FloatTensor,
                        t_hidden_states: FloatTensor,
                        layer_idx: int = None) -> FloatTensor:

        if self.need_mapping:
            if s_hidden_states.dim() > 3:
                raise TypeError(
                    "MSE loss with mapping can be applied only to flatten hidden state"
                )
            assert s_hidden_states.size(-1) == self.student_hidden_state_dim
            assert t_hidden_states.size(-1) == self.teacher_hidden_state_dim
            s_hidden_states = s_hidden_states.reshape(
                -1, self.student_hidden_state_dim)
            if self.layer_idx is not None:
                t_hidden_states = self.proj[layer_idx](t_hidden_states.reshape(
                    -1, self.teacher_hidden_state_dim))
            else:
                t_hidden_states = self.proj(
                    t_hidden_states.reshape(-1, self.teacher_hidden_state_dim))
            if self.normalize:
                s_hidden_states = F.normalize(s_hidden_states)
                t_hidden_states = F.normalize(t_hidden_states)
        else:

            if s_hidden_states.dim() <= 3:
                hidden_dim = s_hidden_states.size(-1)
                s_hidden_states = s_hidden_states.reshape(-1, hidden_dim)
                t_hidden_states = t_hidden_states.reshape(-1, hidden_dim)
                if self.normalize:
                    s_hidden_states = F.normalize(s_hidden_states)
                    t_hidden_states = F.normalize(t_hidden_states)
            else:
                if self.normalize:
                    raise TypeError(
                        "Normalizing can be applied only to flatten hidden state"
                    )
                s_hidden_states = s_hidden_states.flatten()
                t_hidden_states = t_hidden_states.flatten()

        assert s_hidden_states.shape == t_hidden_states.shape
        return self.loss_fn(s_hidden_states, t_hidden_states)
Example #17
0
    def forward(self,
                in_sequence: torch.FloatTensor,
                out_sequence: torch.FloatTensor,
                padding_mask: torch.ByteTensor = None) -> torch.FloatTensor:
        """Runs the decoder.

        Args:
            in_sequence (torch.FloatTensor): The input sequence as (batch-size x in-seq-len x dim_model)-tensor.
            out_sequence (torch.FloatTensor): The output sequence as (batch-size x out-seq-len x dim_model)-tensor.
            padding_mask (torch.ByteTensor, optional): Optionally, a padding mask as
                (batch-size x in-seq-len x in-seq-len)-tensor. To that end, ``1``s indicate those positions that are
                part of the according sequence, and ``0``s mark padding tokens.

        Returns:
            FloatTensor: The computed output as (batch_size x out-seq-len x dim_model)-tensor.
        """
        assert in_sequence.dim() == 3
        assert in_sequence.size(2) == self._dim_model
        assert out_sequence.dim() == 3
        assert out_sequence.size(0) == in_sequence.size(0)
        assert out_sequence.size(2) == self._dim_model
        if padding_mask is not None:
            assert padding_mask.dim() == 3
            assert padding_mask.size(0) == in_sequence.size(0)
            assert padding_mask.size(1) == in_sequence.size(1)
            assert padding_mask.size(2) == in_sequence.size(1)

        # create shifted output mask
        shifted_output_mask = util.create_shifted_output_mask(out_sequence)

        # shift provided target output to the right
        out_sequence = util.shift_output_sequence(out_sequence)

        # apply all layers to the input
        for layer in self._layers:
            out_sequence = layer(in_sequence, out_sequence, padding_mask,
                                 shifted_output_mask)

        # provide the created output
        return out_sequence
Example #18
0
def masked_topk(
    input_: torch.FloatTensor,
    mask: torch.BoolTensor,
    k: Union[int, torch.LongTensor],
    dim: int = -1,
) -> Tuple[torch.LongTensor, torch.LongTensor, torch.FloatTensor]:
    if input_.size() != mask.size():
        raise ValueError("`input_` and `mask` must have the same shape.")
    if not -input_.dim() <= dim < input_.dim():
        raise ValueError("`dim` must be in `[-input_.dim(), input_.dim())`")
    dim = (dim + input_.dim()) % input_.dim()

    max_k = k if isinstance(k, int) else k.max()


    permutation = list(range(input_.dim()))
    permutation.pop(dim)
    permutation += [dim]

    reverse_permutation = list(range(input_.dim() - 1))
    reverse_permutation.insert(dim, -1)

    other_dims_size = list(input_.size())
    other_dims_size.pop(dim)
    permuted_size = other_dims_size + [max_k]  # for restoration

    if isinstance(k, int):
        k = k * torch.ones(*other_dims_size, dtype=torch.long, device=mask.device)
    else:
        if list(k.size()) != other_dims_size:
            raise ValueError(
                "`k` must have the same shape as `input_` with dimension `dim` removed."
            )

    num_items = input_.size(dim)
    input_ = input_.permute(*permutation).reshape(-1, num_items)
    mask = mask.permute(*permutation).reshape(-1, num_items)
    k = k.reshape(-1)

    input_ = replace_masked_values(input_, mask, min_value_of_dtype(input_.dtype))

    _, top_indices = input_.topk(max_k, 1)

    top_indices_mask = get_mask_from_sequence_lengths(k, max_k).bool()

    fill_value, _ = top_indices.max(dim=1, keepdim=True)
    top_indices = torch.where(top_indices_mask, top_indices, fill_value)

    top_indices, _ = top_indices.sort(1)

    sequence_mask = mask.gather(1, top_indices)
    top_mask = top_indices_mask & sequence_mask

    top_input = input_.gather(1, top_indices)

    return (
        top_input.reshape(*permuted_size).permute(*reverse_permutation),
        top_mask.reshape(*permuted_size).permute(*reverse_permutation),
        top_indices.reshape(*permuted_size).permute(*reverse_permutation),
    )
Example #19
0
    def fit(self, x: torch.FloatTensor):
        assert (x.dim() == 2)
        with torch.no_grad():
            scaler = StandardScaler()
            scaler.fit(x.detach().numpy())

            self.mean = torch.FloatTensor(scaler.mean_.copy()).view(-1)
            self.std = torch.FloatTensor(scaler.scale_.copy()).view(-1)
            invalid = ~(torch.isfinite(self.mean) & torch.isfinite(self.std))
            self.mean[
                invalid] = 0.  # somethime we face data with some all-NaN columns
            self.std[invalid] = 1.
            return self
Example #20
0
    def forward(self, sequence: torch.FloatTensor) -> torch.FloatTensor:
        """Runs the feed-forward layer.

        Args:
            sequence (torch.FloatTensor): The input sequence given as (batch_size x seq_len x dim_model)-tensor.

        Returns:
            torch.FloatTensor: The computed values as (batch_size x seq_len x dim_model)-tensor.
        """
        assert sequence.dim() == 3
        assert sequence.size(2) == self._dim_model

        sequence = functional.relu(self._layer_1(sequence.transpose(1, 2)))
        sequence = self._layer_2(sequence).transpose(1, 2)

        return sequence
Example #21
0
def rank_by_score(
        scores: _torch.FloatTensor,
        n: _torch.LongTensor,
        generator: Optional[_torch.Generator] = None) -> _torch.LongTensor:
    """Sorts scores in decreasing order.

    This method ensures that padded documents are placed last and ties are
    broken randomly.

    Args:
        scores: A tensor of size (batch_size, list_size, 1) or
                (batch_size, list_size) containing scores.
        n: A tensor of size (batch_size) containing list size of each query.
    """
    if scores.dim() == 3:
        scores = scores.reshape((scores.shape[0], scores.shape[1]))
    return tiebreak_argsort(mask_padded_values(scores, n), generator=generator)
Example #22
0
    def forward(self, sequence: torch.FloatTensor,
                mask: Union[None, torch.ByteTensor]) -> torch.FloatTensor:
        """
        执行模型。对多个 kernel size 最终会将每一个 kernel 输出的向量, concat 在一起。
        pooling 使用的 max pooling.
        :param sequence: 输入的token 序列, shape: (batch_size, seq_len, embedding_dim)
        :param mask: mask
        :return: cnn 编码向量, shape: (batch_size, num_filter * len(kernel_sizes))
        """

        assert sequence.dim(
        ) == 3, f"tokens.dim: {sequence.dim()} 与 shape: (batch_size, seq_len, embedding_dim) 不匹配"

        if mask is not None:
            assert mask.dim(
            ) == 2, f"mask.dim: {mask.dim()} 与 shape: (batch_size, seq_len) 不匹配"

            # 将 mask 的 token 清零,避免影响 cnn
            sequence = sequence * mask.unsqueeze(dim=-1).float()

        # 将 1 和 2 转置, 转置后 shape: (batch_size, embedding_dim, seq_len)
        sequence = torch.transpose(sequence, 1, 2)

        # 每一个 cnn_vector_i: (batch_size, embedding_dim, new_seq_len_i)
        # 注意不同 kernel_size 的 cnn, 产生的 new_seq_len 长度是不同的 所以这里用下标 i 来表示.
        cnn_vectors = [
            self.activtion(cnn(sequence)) for cnn in self.cnn_layers
        ]

        assert cnn_vectors[0].dim() == 3, \
            f"cnn_vectors.dim: {cnn_vectors[0].dim()} 与 shape: (batch_size, num_filter, new_seq_len) 不匹配"
        assert cnn_vectors[0].size(1) == self.num_filters

        # max pooling, 直接使用 max,而不是使用 MaxPool1D, max 更方便,MaxPool1D 需要设置 kernel size 为 seq_len.
        max_pooled_cnn_vectors = [
            cnn_vector.max(dim=-1)[0] for cnn_vector in cnn_vectors
        ]
        assert max_pooled_cnn_vectors[0].dim() == 2, \
            f"max_pooled_cnn_vectors.dim: {max_pooled_cnn_vectors[0].dim()} 与 shape: (batch_size, num_filter) 不匹配"

        # 最后 max_pooled_cnn_vectors concat 在一起
        vector = \
            torch.cat(max_pooled_cnn_vectors, dim=-1) if len(max_pooled_cnn_vectors) > 1 else max_pooled_cnn_vectors[0]

        return vector
Example #23
0
def tensor_to_np(t_img: torch.FloatTensor, encoding: str, out_format: str):
    '''
    Convert a tensor image to numpy image. 
    This is sort of the inverse operation of format_tensor_img(). \\
    NOTE: this function is not optimized for speed

    Args:
        t_img: tensor image
        encoding: how tensor image is transformed.
                  Available: 'RGB_1', 'RGB_1_norm', 'BGR_255_norm'
        out_format: 'RGB_1', 'BGR_1'
    '''
    assert torch.is_tensor(t_img) and t_img.dim() == 3 and t_img.shape[0] == 3
    assert encoding in {'RGB_1', 'RGB_1_norm', 'BGR_255_norm'}
    assert out_format in {'RGB_1', 'BGR_1', 'BGR_uint8', 'RGB_uint8'}

    t_img = t_img.clone()
    # 0. convert everthing to RGB_1
    if encoding == 'RGB_1':
        pass
    elif encoding == 'RGB_1_norm':
        means = [0.485, 0.456, 0.406]
        stds = [0.229, 0.224, 0.225]
        for channel, m, sd in zip(t_img, means, stds):
            channel.mul_(sd).add_(m)
    elif encoding == 'BGR_255_norm':
        raise NotImplementedError()
    else:
        raise NotImplementedError()
    im = t_img.permute(1, 2, 0).numpy()
    # 1. convert RGB_1 to output format
    if out_format == 'RGB_1':
        pass
    elif out_format == 'BGR_1':
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
    elif out_format == 'RGB_uint8':
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = (im * 255).astype('uint8')
    elif out_format == 'BGR_uint8':
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
        im = (im * 255).astype('uint8')
    else:
        raise NotImplementedError()
    return im
Example #24
0
    def forward(self,
                sequence: torch.FloatTensor,
                padding_mask: torch.ByteTensor = None) -> torch.FloatTensor:
        """Runs the encoder.

        Args:
            sequence (torch.FloatTensor): The input sequence as (batch-size x seq-len x dim-model)-tensor.
            padding_mask (torch.ByteTensor, optional):  Optionally, a padding mask as
                (batch-size x in-seq-len x in-seq-len)-tensor. To that end, ``1``s indicate those positions that are
                part of the according sequence, and ``0``s mark padding tokens.

        Returns:
            FloatTensor: The encoded sequence as (batch_size x seq_len x dim_model)-tensor.
        """
        assert sequence.dim() == 3
        assert sequence.size(2) == self._dim_model

        # apply all layers to the input
        for layer in self._layers:
            sequence = layer(sequence, padding_mask)

        # provide the final sequence
        return sequence
Example #25
0
def bbox_to_mask(bboxes: torch.FloatTensor,
                 bb_format='cxcywhd',
                 mask_size=2048) -> torch.BoolTensor:
    '''
    Convert bounding boxes to binary masks

    Args:
        bboxes: bounding boxes, shape [N, bb_param]
    
    Return:
        masks: shape [N, mask_size, mask_size]
    '''
    assert isinstance(bboxes, torch.FloatTensor) and bboxes.dim() == 2
    if bb_format == 'cxcywhd':
        assert bboxes.shape[1] == 5
        bboxes = bboxes.clone()
        bboxes[:, 4] = bboxes[:, 4] / 180 * pi
        vertices = xywha2vertex(bboxes, is_degree=False)
        masks = vertex2masks(vertices, mask_size=mask_size)
    else:
        raise NotImplementedError()

    return masks
Example #26
0
    def _viterbi_decode(self, emissions: torch.FloatTensor,
                        mask: torch.ByteTensor) -> List[List[int]]:
        # emissions: (seq_length, batch_size, num_tags)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.shape[:2] == mask.shape
        assert emissions.size(2) == self.num_tags
        assert mask[0].all()

        seq_length, batch_size = mask.shape

        # Start transition and first emission
        # shape: (batch_size, num_tags)
        temp1 = torch.matmul(self.start_transitions, self.transitions)
        temp2 = torch.transpose(self.labelembedding.weight, 0, 1)
        temp = torch.matmul(temp1, temp2).squeeze()

        score = temp + emissions[0]
        history = []
        trans = torch.matmul(
            torch.matmul(self.labelembedding.weight, self.transitions),
            torch.transpose(self.labelembedding.weight, 0, 1))
        # score is a tensor of size (batch_size, num_tags) where for every batch,
        # value at column j stores the score of the best tag sequence so far that ends
        # with tag j
        # history saves where the best tags candidate transitioned from; this is used
        # when we trace back the best tag sequence

        # Viterbi algorithm recursive case: we compute the score of the best tag sequence
        # for every possible next tag
        for i in range(1, seq_length):
            # Broadcast viterbi score for every possible next tag
            # shape: (batch_size, num_tags, 1)
            broadcast_score = score.unsqueeze(2)

            # Broadcast emission score for every possible current tag
            # shape: (batch_size, 1, num_tags)
            broadcast_emission = emissions[i].unsqueeze(1)

            # Compute the score tensor of size (batch_size, num_tags, num_tags) where
            # for each sample, entry at row i and column j stores the score of the best
            # tag sequence so far that ends with transitioning from tag i to tag j and emitting
            # shape: (batch_size, num_tags, num_tags)
            next_score = broadcast_score + trans + broadcast_emission

            # Find the maximum score over all possible current tag
            # shape: (batch_size, num_tags)
            next_score, indices = next_score.max(dim=1)

            # Set score to the next score if this timestep is valid (mask == 1)
            # and save the index that produces the next score
            # shape: (batch_size, num_tags)
            score = torch.where(mask[i].unsqueeze(1), next_score, score)
            history.append(indices)

        # End transition score
        # shape: (batch_size, num_tags)
        temp1 = torch.matmul(self.labelembedding.weight, self.transitions)
        temp = torch.matmul(temp1, self.end_transitions).squeeze()
        score += temp

        # Now, compute the best path for each sample

        # shape: (batch_size,)
        seq_ends = mask.long().sum(dim=0) - 1
        best_tags_list = []

        for idx in range(batch_size):
            # Find the tag which maximizes the score at the last timestep; this is our best tag
            # for the last timestep
            _, best_last_tag = score[idx].max(dim=0)
            best_tags = [best_last_tag.item()]

            # We trace back where the best last tag comes from, append that to our best tag
            # sequence, and trace it back again, and so on
            for hist in reversed(history[:seq_ends[idx]]):
                best_last_tag = hist[idx][best_tags[-1]]
                best_tags.append(best_last_tag.item())

            # Reverse the order because we start from the last timestep
            best_tags.reverse()
            best_tags_list.append(best_tags)

        return best_tags_list
Example #27
0
    def forward(
        self,
        source: torch.FloatTensor,  # [batch, tgt_len, dim]
        memory_bank_list: List[
            torch.FloatTensor],  # [num_srcs] x [batch, src_len, dim]
        memory_lengths_list: List[
            torch.FloatTensor] = None,  # [num_srcs] x [batch]
        coverage=None
    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        assert coverage is None

        # one step input
        if source.dim() == 2:
            one_step = True
            source = source.unsqueeze(1)
        else:
            one_step = False
        # end if

        # Join memory bank
        memory_bank = torch.cat(memory_bank_list, dim=1)

        batch, source_l, dim = memory_bank.size()
        batch_, target_l, dim_ = source.size()
        aeq(batch, batch_)
        aeq(dim, dim_)
        aeq(self.dim, dim)
        if coverage is not None:
            batch_, source_l_ = coverage.size()
            aeq(batch, batch_)
            aeq(source_l, source_l_)

        if coverage is not None:
            cover = coverage.view(-1).unsqueeze(1)
            memory_bank += self.linear_cover(cover).view_as(memory_bank)
            memory_bank = torch.tanh(memory_bank)

        # compute attention scores, as in Luong et al.
        align = self.score(source, memory_bank)

        if memory_lengths_list is not None:
            mask = torch.cat([
                sequence_mask(memory_lengths,
                              max_len=memory_bank_list[src_i].size(1))
                for src_i, memory_lengths in enumerate(memory_lengths_list)
            ],
                             dim=1)
            mask = mask.unsqueeze(1)  # Make it broadcastable.
            align.masked_fill_(1 - mask, -float('inf'))
        # end if

        # Softmax or sparsemax to normalize attention weights
        if self.attn_func == "softmax":
            align_vectors = F.softmax(align.view(batch * target_l, source_l),
                                      -1)
        else:
            align_vectors = sparsemax(align.view(batch * target_l, source_l),
                                      -1)
        align_vectors = align_vectors.view(batch, target_l, source_l)

        # each context vector c_t is the weighted average
        # over all the source hidden states
        c = torch.bmm(align_vectors, memory_bank)

        # concatenate
        concat_c = torch.cat([c, source], 2).view(batch * target_l, dim * 2)
        attn_h = self.linear_out(concat_c).view(batch, target_l, dim)
        if self.attn_type in ["general", "dot"]:
            attn_h = torch.tanh(attn_h)
        # end if

        if one_step:
            attn_h = attn_h.squeeze(1)
            align_vectors = align_vectors.squeeze(1)

            # Check output sizes
            batch_, dim_ = attn_h.size()
            aeq(batch, batch_)
            aeq(dim, dim_)
            batch_, source_l_ = align_vectors.size()
            aeq(batch, batch_)
            aeq(source_l, source_l_)

        else:
            attn_h = attn_h.transpose(0, 1).contiguous()
            align_vectors = align_vectors.transpose(0, 1).contiguous()
            # Check output sizes
            target_l_, batch_, dim_ = attn_h.size()
            aeq(target_l, target_l_)
            aeq(batch, batch_)
            aeq(dim, dim_)
            target_l_, batch_, source_l_ = align_vectors.size()
            aeq(target_l, target_l_)
            aeq(batch, batch_)
            aeq(source_l, source_l_)
        # end if

        return attn_h, align_vectors
Example #28
0
    def _viterbi_decode(self, emissions: torch.FloatTensor,
                        mask: torch.ByteTensor) -> List[List[int]]:
        # emissions: (seq_length, batch_size, num_tags)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.shape[:2] == mask.shape
        assert emissions.size(2) == self.num_tags
        mask = torch.tensor(mask, dtype=torch.uint8).cuda()
        assert mask[0].all()

        seq_length, batch_size = mask.shape
        # self.start_transitions  start 到其他tag(不包含end)的得分
        # <start>->其他tag的发射得分 + 每句话的第一个字的tag的发射得分
        # emissions.shape = [62,32,3]  start_transitions.shape = [3]
        score = self.start_transitions + emissions[0]  # 广播
        history = []

        for i in range(1, seq_length):
            # score.shape = [32,3] -> [32,3,1]
            # 起始得分,发射得分, 扩展维度,公式中的expand previous
            broadcast_score = score.unsqueeze(2)

            # emissions.shape = [62,32,3] 然后emissions[i]是每句话中的第i个单词的发射得分
            # emissions[i].shape = [32,3].unsqueeze(1) = [32,1,3]
            broadcast_emission = emissions[i].unsqueeze(1)  # 扩展维度

            #            初始得分         + 发射得分         + 之前得分+现在得分
            # [32,3,3]=  [32,3,1]        + [3,3,3]          + [32,1,3]
            #            初始得分           公式中的t,转移得分  发射得分,单词wi->tagj的发射概率
            next_score = broadcast_score + self.transitions + broadcast_emission

            # 每句话中每个单词对应的tag的得分最大值,tag[i]->tag[j]最大得分
            # next_score.shape = [32,3] =indices.shape
            # 这个时刻中的最大值被保留下来
            next_score, indices = next_score.max(dim=1)

            # 不计算padding部分的得分
            score = torch.where(mask[i].unsqueeze(1), next_score, score)

            history.append(indices)

        # 遍历完一句话,还得加上最后 <end> tag  [32,3]
        score += self.end_transitions

        # 计算到最后一个单词的下标
        seq_ends = mask.long().sum(dim=0) - 1  # [32]
        best_tags_list = []

        jj = 0
        for idx in range(batch_size):  # 32

            # score.shape = [32,3] 每句话中找最好的tag
            _, best_last_tag = score[idx].max(dim=0)  # 然后找最好的tag

            best_tags = [best_last_tag.item()]

            # history[:seq_ends[idx]].shape  (seq_ends[idx])

            # history 的长度是一个句子的长度 61
            # history[i].shape = [32,3]
            # history[:seq_ends[idx] 取句子长度,seq_ends之后是padding部分
            # reversed(history[:seq_ends[idx]]) 将句子反过来
            # hist第一个取到的是最后一个字
            for hist in reversed(history[:seq_ends[idx]]):  # 画图
                # hist.shape = [32,3]
                # code.interact(local = locals())
                best_last_tag = hist[idx][best_tags[-1]]
                best_tags.append(best_last_tag.item())

            best_tags.reverse()
            best_tags_list.append(best_tags)
        return best_tags_list
Example #29
0
    def _viterbi_decode(self, emissions: torch.FloatTensor,
                        mask: torch.ByteTensor) -> List[List[int]]:
        # emissions: (seq_length, batch_size, num_tags)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.shape[:2] == mask.shape
        assert emissions.size(2) == self.num_tags
        assert mask[0].all()

        seq_length, batch_size = mask.shape

        # Start transition and first emission
        # shape: (batch_size, num_tags)
        score = self.start_transitions + emissions[0]
        history = []

        # Keep the scores to later return if the user wants them them based on
        # the score reduction, which will have shape:
        # (batch_size, seq_len, num_tags)
        if self.score_reduction == 'tags':
            tag_scores = []
        else:
            tag_scores = None

        # score is a tensor of size (batch_size, num_tags) where for every batch,
        # value at column j stores the score of the best tag sequence so far that ends
        # with tag j
        # history saves where the best tags candidate transitioned from; this is used
        # when we trace back the best tag sequence

        # Viterbi algorithm recursive case: we compute the score of the best tag sequence
        # for every possible next tag
        for i in range(1, seq_length):
            # Broadcast viterbi score for every possible next tag
            # shape: (batch_size, num_tags, 1)
            broadcast_score = score.unsqueeze(2)

            # Broadcast emission score for every possible current tag
            # shape: (batch_size, 1, num_tags)
            broadcast_emission = emissions[i].unsqueeze(1)

            # Compute the score tensor of size (batch_size, num_tags, num_tags) where
            # for each sample, entry at row i and column j stores the score of the best
            # tag sequence so far that ends with transitioning from tag i to tag j and emitting
            # shape: (batch_size, num_tags, num_tags)
            next_score = broadcast_score + self.transitions + broadcast_emission

            # Find the maximum score over all possible current tag
            # shape: (batch_size, num_tags)
            next_score, indices = next_score.max(dim=1)

            # Set score to the next score if this timestep is valid (mask == 1)
            # and save the index that produces the next score
            # shape: (batch_size, num_tags)
            score = torch.where(mask[i].unsqueeze(1), next_score, score)

            # Add scores to cat them later based on the score reduction
            # shape: (batch_size, num_tags)
            if tag_scores is not None:
                tag_scores.append(score.detach().unsqueeze(1))

            history.append(indices)

        # End transition score
        # shape: (batch_size, num_tags)
        score += self.end_transitions

        # Add the final transition score to our returned scores
        # shape: (batch_size, num_tags)
        if tag_scores is not None:
            tag_scores.append(score.detach().unsqueeze(1))

        # Now, compute the best path for each sample

        # shape: (batch_size,)
        seq_ends = mask.long().sum(dim=0) - 1
        best_tags_list = []

        for idx in range(batch_size):
            # Find the tag which maximizes the score at the last timestep; this is our best tag
            # for the last timestep
            _, best_last_tag = score[idx].max(dim=0)
            best_tags = [best_last_tag.item()]

            # We trace back where the best last tag comes from, append that to our best tag
            # sequence, and trace it back again, and so on
            for hist in reversed(history[:seq_ends[idx]]):
                best_last_tag = hist[idx][best_tags[-1]]
                best_tags.append(best_last_tag.item())

            # Reverse the order because we start from the last timestep
            best_tags.reverse()
            best_tags_list.append(best_tags)

        # If the user wants scores, return them in the desired format
        if self.score_reduction == 'skip':
            return best_tags_list
        else:
            if self.score_reduction == 'tags':
                # shape: (batch_size, seq_length, num_tags)
                score = torch.cat(tag_scores, 1)
            elif self.score_reduction != 'none':
                score = score.max(dim=1)[0]
                score = {
                    'sum': score.sum(),
                    'max': score.max(),
                    'min': score.min(),
                    'mean': score.mean()
                }[self.score_reduction]
                score = score / batch_size
            return best_tags_list, score
Example #30
0
 def _audio_postprocess(self,
                        feats: torch.FloatTensor) -> torch.FloatTensor:
     if feats.dim == 2:
         feats = feats.mean(-1)
     assert feats.dim() == 1, feats.dim()
     return F.layer_norm(feats, feats.shape)