def forward(self,
                query: Tensor,
                key: Tensor,
                value: Tensor,
                mask: Tensor = None) -> Tensor:
        """
        @param query shape -> [batch_size, max_length, emb_size]
        @param key shape -> [batch_size, max_length, emb_size]
        @param value shape -> [batch_size, max_length, emb_size]
        @param mask shape -> [1, max_length, max_length]
        @return a tensor with shape -> ??
        """
        if mask is not None:
            # 1, n, n -> 1, 1, n, n; n is max length of sentence
            mask = mask.unsqueeze(1)
        batch_size = query.size(0)

        # do projection
        query, key, value = [
            linear_f(x).view(batch_size, -1, self.head_count,
                             self.model_k_dim).transpose(1, 2)
            for linear_f, x in zip(self.linears, (query, key, value))
        ]
        # do attention
        x, self.attn = attention(query, key, value, mask, self.dropout)
        # do concatenation
        x = x.transpose(1, 2).contiguous().view(
            batch_size, -1, self.head_count * self.model_k_dim)
        return self.linears[-1](x)
Example #2
0
    def forward(self, X: Tensor, initial_states=None):

        #if self.init_states is None:
        self.init_states = torch.zeros(
            self.gru_hidden_layers * self.num_directions,
            X.size(self.batch_index), self.hidden_dimensions)

        # self.init_states = self.init_states.to(util.device)

        # TODO
        if X.shape[self.batch_index] != self.init_states.shape[1]:
            pass

        #
        output_gru, initial_states = self.gru_encoder(X, self.init_states)

        # TODO: if batchnorm handle differently
        # if self.use_batchnorm:
        # 	pass

        # TODO: if birdirectional handle differently [Note?]: This task should not need bidirectional RNN
        # Remember that initial states will be [(self.gru_hidden_layers * self.num_directions) x (X.shape[self.batch_index]) x (self.hidden_dimensions)]
        #
        # initial_states[-self.num_directions:, :, :] # output_gru[:,-1, :self.hidden_dimensions].view(1, -1, self.hidden_dimensions)
        return output_gru[:, -1, :self.hidden_dimensions].view(
            1, -1, self.hidden_dimensions)  # initial_states[-1, :, :]
Example #3
0
    def forward(self, input: Tensor, mask: Tensor = None, hx: Tuple[Tensor, Tensor] = None) -> Tuple[Tensor, Tensor]:
        batch_size = input.size(0) if self.batch_first else input.size(1)
        if hx is None:
            num_directions = 2 if self.bidirectional else 1
            hx = input.new_zeros((self.num_layers * num_directions, batch_size, self.hidden_size))
            hx = (hx, hx)

        func = rnn_f.autograd_var_masked_rnn(num_layers=self.num_layers,
                                             batch_first=self.batch_first,
                                             bidirectional=self.bidirectional,
                                             lstm=True)

        self.reset_noise(batch_size)

        output, hidden = func(input, self.all_cells, hx, None if mask is None else mask.view(mask.size() + (1,)))
        return output, hidden
    def forward(self, input: Tensor, mask: Tensor = None) -> Tensor:
        """
        Args:
            input: Tensor
                the input tensor with shape = [batch, length, input_size]
            mask: Tensor or None
                the mask tensor with shape = [batch, length]

        Returns: Tensor
            the energy tensor with shape = [batch, length, num_label, num_label]

        """
        batch, length, _ = input.size()

        # compute out_s by tensor dot [batch, length, input_size] * [input_size, num_label]
        # thus out_s should be [batch, length, num_label] --> [batch, length, 1, num_label]
        out_s = self.state_nn(input)

        if mask is not None:
            out_s[:, :, self.index_eos] += (mask == 0).float() * 2e4

        # [batch, length, num_label, num_label]
        output = self.trans_matrix + out_s.unsqueeze(2)

        return output
 def predict_recursively(preds: Tensor, energy: Tensor,
                         offset: int) -> NestedSequenceLabel:
     length = preds.size(0)
     nested_preds_list = []
     index = 0
     while index < length:
         id = preds[index]
         if id == eos_id:
             break
         if id != o_id:
             if id == b_id:  # B-XXX
                 start_tmp = index
                 index += 1
                 if index == length:
                     break
                 id = preds[index]
                 while id == i_id:  # I-XXX
                     index += 1
                     if index == length:
                         break
                     id = preds[index]
                 if id == e_id:  # E-XXX
                     end_tmp = index + 1
                     nested_preds = decode_nest(
                         energy[start_tmp:end_tmp, :, :])
                     nested_preds_list.append(
                         predict_recursively(
                             nested_preds,
                             energy[start_tmp:end_tmp, :, :],
                             start_tmp + offset))
         index += 1
     return NestedSequenceLabel(offset, length + offset, preds,
                                nested_preds_list)
Example #6
0
def slice_last_dim(d: Tensor, length: int = 160) -> Tensor:
    """
    Slice last dimention if length is too much.
    If input is shorter than `length`, error is thrown.
    [..., L>160] => [..., L==160]
    """
    start = torch.randint(0, d.size()[-1] - (length - 1), (1, )).item()
    return torch.narrow(d, -1, start, length)
Example #7
0
    def tolist(paired_wavs: List[Tensor], paired_feature: Tensor):
        assert paired_feature.dim() == 3
        # (batch_size, max_seq_len, feat_dim)

        ratio = max([len(wav) for wav in paired_wavs]) / paired_feature.size(1)
        feature_len = [round(len(wav) / ratio) for wav in paired_wavs]
        feature = [f[:l] for f, l in zip(paired_feature, feature_len)]
        return feature
    def nests_loss(self, energy: Tensor, target: Tensor) -> Tensor:
        """
        Args:
            energy: Tensor
                the energy tensor with shape = [length, num_label, num_label]
            target: Tensor
                the tensor of target labels with shape [length]

        Returns: Tensor
                A 0D tensor for minus log likelihood loss
        """
        length, _, _ = energy.size()

        num_label_3 = self.indices_is.size(0)

        indices_3 = energy.new_empty((length, num_label_3)).long()
        indices_3[0, :] = self.indices_bs
        if length > 2:
            indices_3[1:length - 1, :] = self.indices_is.repeat(
                (length - 2, 1))
        indices_3[length - 1, :] = self.indices_es

        # shape = [num_label]
        partition_1 = None
        partition_3 = None

        # shape = []
        prev_label = self.index_bos
        tgt_energy = 0

        for t in range(length):
            # shape = [num_label, num_label]
            curr_energy = energy[t]
            if t == 0:
                partition_1 = curr_energy[self.index_bos, :]
                partition_3 = energy.new_full((num_label_3, ), -1e4)
            else:
                # shape = [num_label]
                partition = partition_1.clone()
                partition[indices_3[t - 1]] = partition_3
                partition_1 = logsumexp(curr_energy + partition_1.unsqueeze(1),
                                        dim=0)
                partition_3 = logsumexp(curr_energy[:, indices_3[t]] +
                                        partition.unsqueeze(1),
                                        dim=0)
            label = target[t]
            tgt_energy += curr_energy[prev_label, label]
            prev_label = label

        t = length - 1
        curr_energy = self.trans_matrix.data[:, self.index_eos]
        partition = curr_energy + partition_1
        partition[indices_3[t]] = curr_energy[indices_3[t]] + partition_3
        return logsumexp(partition, dim=0) - tgt_energy
Example #9
0
def pad_last_dim(d: Tensor, length_min: int = 160) -> Tensor:
    """
    Pad last dimension with 0 if length is not enough.
    If input is longer than `length_min`, nothing happens.
    [..., L<160] => [..., L==160]
    """
    shape = d.size()
    length_d = shape[-1]
    if length_d < length_min:
        a = torch.zeros([*shape[:-1], length_min - length_d])
        return torch.cat((d, a), -1)
    else:
        return d
Example #10
0
    def forward(self, img: Tensor, labels: int):
        """Forward pass of the Discriminator.

        Args:
            img: the image that should be classified in fake or real.
            labels: the label of the image.

        Returns:

        """
        d_in = torch.cat(
            (img.view(img.size(0), -1), self.label_embedding(labels)), -1)
        score = self.model(d_in)
        return score
Example #11
0
    def _get_matrix(self, x:Tensor) -> Tensor:
        r'''
        Converts flat data to matrix via lookup-and-reshaping, elements not present in flat data are set to zero

        Arguments:
            x: flat data

        Returns:
            2D matrix on device
        '''

        mat = x[:,self.lookup]
        mat[:,self.missing] = 0
        mat = mat.reshape((x.size(0),len(self.vecs),len(self.fpv)) if self.row_wise else (x.size(0),len(self.fpv),len(self.vecs))) 
        return to_device(mat)
 def forward(self, x: Tensor, target: Tensor):
     """
     @param x: shape -> [length, number_of_class]
     @param target: shape -> [length]
     """
     assert x.size(-1) == self.number_of_class
     true_dist: Tensor = x.data.clone()
     # 2: 1 give to the target class/label, 1 give padding
     true_dist.fill_(self.smoothing / (self.number_of_class - 2))
     true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
     true_dist[:, self.padding_idx] = 0
     mask: Tensor = torch.nonzero(target == self.padding_idx)
     if mask.dim() > 0:
         true_dist.index_fill_(0, mask.squeeze(), 0.0)
     self.true_dist = true_dist
     return self.criteron(x, Variable(true_dist, requires_grad=False))
    def loss(self,
             input: Tensor,
             target: Tensor,
             mask: Tensor = None) -> Tuple[Tensor, Tensor]:
        """
        Args:
            input: Tensor
                the input tensor with shape = [batch, length, input_size]
            target: Tensor
                the tensor of target labels with shape [batch, length]
            mask: Tensor or None
                the mask tensor with shape = [batch, length]

        Returns: Tensor
                A 1D tensor for minus log likelihood loss
        """
        batch, length, _ = input.size()
        energy = self.forward(input, mask=mask)
        # shape = [length, batch, num_label, num_label]
        energy_transpose = energy.transpose(0, 1)
        # shape = [length, batch]
        target_transpose = target.transpose(0, 1)

        # shape = [batch, num_label]
        partition = None

        # shape = [batch]
        batch_index = torch.arange(0, batch).type_as(input).long()
        prev_label = input.new_full((batch, ), self.index_bos).long()
        tgt_energy = input.new_zeros(batch)

        for t in range(length):
            # shape = [batch, num_label, num_label]
            curr_energy = energy_transpose[t]
            if t == 0:
                partition = curr_energy[:, self.index_bos, :]
            else:
                # shape = [batch, num_label]
                partition = logsumexp(curr_energy + partition.unsqueeze(2),
                                      dim=1)
            label = target_transpose[t]
            tgt_energy += curr_energy[batch_index, prev_label, label]
            prev_label = label

        return logsumexp(
            self.trans_matrix.data[:, self.index_eos].unsqueeze(0) + partition,
            dim=1) - tgt_energy, energy
Example #14
0
def attention(query: Tensor,
              key: Tensor,
              value: Tensor,
              mask: Tensor = None,
              dropout=None):
    """
    scaled dot production attention
    @param query shape -> batch_size, head_count, max_length, model_dim_size/head_count
    @param key shape -> batch_size, head_count, max_length, model_dim_size/head_count
    @param value shape -> batch_size, head_count, max_length, model_dim_size/head_count

    """
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores.masked_fill(mask == 0, -1e9)
    p_attention = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attention = dropout(p_attention)
    return torch.matmul(p_attention, value), p_attention
Example #15
0
    def step(self, input: Tensor, hx: Tuple[Tensor, Tensor] = None, mask: Tensor = None) -> Tuple[Tensor, Tensor]:
        """
        execute one step forward (only for one-directional RNN).
        Args:
            input (batch, input_size): input tensor of this step.
            hx (num_layers, batch, hidden_size): the hidden state of last step.
            mask (batch): the mask tensor of this step.

        Returns:
            output (batch, hidden_size): tensor containing the output of this step from the last layer of RNN.
            hn (num_layers, batch, hidden_size): tensor containing the hidden state of this step
        """
        assert not self.bidirectional, "step only cannot be applied to bidirectional RNN."
        batch_size = input.size(0)
        if hx is None:
            hx = input.new_zeros((self.num_layers, batch_size, self.hidden_size))
            hx = (hx, hx)

        func = rnn_f.autograd_var_masked_step(num_layers=self.num_layers, lstm=True)

        output, hidden = func(input, self.all_cells, hx, mask)
        return output, hidden
Example #16
0
 def make_std_mask(target: Tensor, pad):
     target_mask = (target!=pad).unsqueeze(-2)
     target_mask = target_mask & Variable(subsequent_mask(target.size(-1)).type_as(target_mask.data))
     return target_mask
 def forward(self, x: Tensor) -> Tensor:
     H = self.mab1(self.inducing_points.repeat(x.size(0), 1, 1), x)
     return self.mab2(x, H)
 def forward(self, x: Tensor) -> Tensor:
     return self.mab(self.seed_vectors.repeat(x.size(0), 1, 1), x)
def adaptive_scaling_loss(logits: Tensor, targets: Tensor, positive_idx: Tensor,
                          mask: Tensor = None, beta: float = 1.0, reduction='none',
                          weight_trainable: bool = False):
    """

    :param logits: (batch, num_label)
    :param targets: (batch, )
    :param positive_idx: (num_label)
        size is the number of all labels, positive_idx is 1, negative_idx is 0
    :param mask: (batch, )
    :param beta: float
    :param reduction:
        Specifies the reduction to apply to the output:
        ``'none'`` | ``'mean'`` | ``'sum'``.
        ``'none'``: no reduction will be applied,
        ``'mean'``: the sum of the output will be divided by the number of elements in the output,
        ``'sum'``: the output will be summed.
    :param weight_trainable: bool
            False, Stop gradient at weight beta
            True, gradient from beta weight back propagated to other parameters
    :return:
    """
    batch_size, num_label = logits.size()
    probs = allennlp_nn_utils.masked_softmax(logits, mask=mask)

    assert positive_idx.size(0) == num_label

    pos_label_mask = positive_idx.unsqueeze(0).expand(batch_size, num_label).to(logits.device)
    neg_label_mask = 1 - pos_label_mask

    targets_index = targets.unsqueeze(-1)

    tp = torch.sum(torch.gather(probs * pos_label_mask, 1, targets_index))
    tn = torch.sum(torch.gather(probs * neg_label_mask, 1, targets_index))

    p_vector = torch.gather(pos_label_mask, 1, targets_index).squeeze(-1).float()
    n_vector = torch.gather(neg_label_mask, 1, targets_index).squeeze(-1).float()
    p_sum = torch.sum(p_vector)
    n_sum = torch.sum(n_vector)
    weight_beta = tp / (beta * beta * p_sum + n_sum - tn)
    weight_beta = n_vector * weight_beta + p_vector

    if not weight_trainable:
        weight_beta.detach_()

    loss = nn.functional.cross_entropy(input=logits, target=targets, reduction='none')

    if mask is None:
        weight_loss = loss * weight_beta
    else:
        weight_loss = loss * weight_beta * mask

    if reduction == 'sum':
        return torch.sum(weight_loss)
    elif reduction == 'mean':
        if mask is None:
            return torch.mean(weight_loss)
        else:
            return torch.sum(weight_loss) / (torch.sum(mask) + 1e-13)
    elif reduction == 'none':
        return weight_loss
    else:
        raise NotImplementedError('reduction %s in ``adaptive_scaling_loss`` is not Implemented' % reduction)
Example #20
0
 def __call__(self, tensor: Tensor) -> Tensor:
     # return tensor + torch.randn(tensor.size()) * self.std + self.mean
     # Clamp output so image with noise is still greyscale:
     return torch.clamp(
         tensor + torch.randn(tensor.size()) * self.std + self.mean, 0, 1)
Example #21
0
 def _augment(self, data: Tensor) -> Tensor:
     index = torch.randint(size=(data.size(0), ), low=0, high=4)
     angles = self._pos_angles[index].squeeze(-1)
     return rotate(data, angles)