def forward(self, query: Tensor, key: Tensor, value: Tensor, mask: Tensor = None) -> Tensor: """ @param query shape -> [batch_size, max_length, emb_size] @param key shape -> [batch_size, max_length, emb_size] @param value shape -> [batch_size, max_length, emb_size] @param mask shape -> [1, max_length, max_length] @return a tensor with shape -> ?? """ if mask is not None: # 1, n, n -> 1, 1, n, n; n is max length of sentence mask = mask.unsqueeze(1) batch_size = query.size(0) # do projection query, key, value = [ linear_f(x).view(batch_size, -1, self.head_count, self.model_k_dim).transpose(1, 2) for linear_f, x in zip(self.linears, (query, key, value)) ] # do attention x, self.attn = attention(query, key, value, mask, self.dropout) # do concatenation x = x.transpose(1, 2).contiguous().view( batch_size, -1, self.head_count * self.model_k_dim) return self.linears[-1](x)
def forward(self, X: Tensor, initial_states=None): #if self.init_states is None: self.init_states = torch.zeros( self.gru_hidden_layers * self.num_directions, X.size(self.batch_index), self.hidden_dimensions) # self.init_states = self.init_states.to(util.device) # TODO if X.shape[self.batch_index] != self.init_states.shape[1]: pass # output_gru, initial_states = self.gru_encoder(X, self.init_states) # TODO: if batchnorm handle differently # if self.use_batchnorm: # pass # TODO: if birdirectional handle differently [Note?]: This task should not need bidirectional RNN # Remember that initial states will be [(self.gru_hidden_layers * self.num_directions) x (X.shape[self.batch_index]) x (self.hidden_dimensions)] # # initial_states[-self.num_directions:, :, :] # output_gru[:,-1, :self.hidden_dimensions].view(1, -1, self.hidden_dimensions) return output_gru[:, -1, :self.hidden_dimensions].view( 1, -1, self.hidden_dimensions) # initial_states[-1, :, :]
def forward(self, input: Tensor, mask: Tensor = None, hx: Tuple[Tensor, Tensor] = None) -> Tuple[Tensor, Tensor]: batch_size = input.size(0) if self.batch_first else input.size(1) if hx is None: num_directions = 2 if self.bidirectional else 1 hx = input.new_zeros((self.num_layers * num_directions, batch_size, self.hidden_size)) hx = (hx, hx) func = rnn_f.autograd_var_masked_rnn(num_layers=self.num_layers, batch_first=self.batch_first, bidirectional=self.bidirectional, lstm=True) self.reset_noise(batch_size) output, hidden = func(input, self.all_cells, hx, None if mask is None else mask.view(mask.size() + (1,))) return output, hidden
def forward(self, input: Tensor, mask: Tensor = None) -> Tensor: """ Args: input: Tensor the input tensor with shape = [batch, length, input_size] mask: Tensor or None the mask tensor with shape = [batch, length] Returns: Tensor the energy tensor with shape = [batch, length, num_label, num_label] """ batch, length, _ = input.size() # compute out_s by tensor dot [batch, length, input_size] * [input_size, num_label] # thus out_s should be [batch, length, num_label] --> [batch, length, 1, num_label] out_s = self.state_nn(input) if mask is not None: out_s[:, :, self.index_eos] += (mask == 0).float() * 2e4 # [batch, length, num_label, num_label] output = self.trans_matrix + out_s.unsqueeze(2) return output
def predict_recursively(preds: Tensor, energy: Tensor, offset: int) -> NestedSequenceLabel: length = preds.size(0) nested_preds_list = [] index = 0 while index < length: id = preds[index] if id == eos_id: break if id != o_id: if id == b_id: # B-XXX start_tmp = index index += 1 if index == length: break id = preds[index] while id == i_id: # I-XXX index += 1 if index == length: break id = preds[index] if id == e_id: # E-XXX end_tmp = index + 1 nested_preds = decode_nest( energy[start_tmp:end_tmp, :, :]) nested_preds_list.append( predict_recursively( nested_preds, energy[start_tmp:end_tmp, :, :], start_tmp + offset)) index += 1 return NestedSequenceLabel(offset, length + offset, preds, nested_preds_list)
def slice_last_dim(d: Tensor, length: int = 160) -> Tensor: """ Slice last dimention if length is too much. If input is shorter than `length`, error is thrown. [..., L>160] => [..., L==160] """ start = torch.randint(0, d.size()[-1] - (length - 1), (1, )).item() return torch.narrow(d, -1, start, length)
def tolist(paired_wavs: List[Tensor], paired_feature: Tensor): assert paired_feature.dim() == 3 # (batch_size, max_seq_len, feat_dim) ratio = max([len(wav) for wav in paired_wavs]) / paired_feature.size(1) feature_len = [round(len(wav) / ratio) for wav in paired_wavs] feature = [f[:l] for f, l in zip(paired_feature, feature_len)] return feature
def nests_loss(self, energy: Tensor, target: Tensor) -> Tensor: """ Args: energy: Tensor the energy tensor with shape = [length, num_label, num_label] target: Tensor the tensor of target labels with shape [length] Returns: Tensor A 0D tensor for minus log likelihood loss """ length, _, _ = energy.size() num_label_3 = self.indices_is.size(0) indices_3 = energy.new_empty((length, num_label_3)).long() indices_3[0, :] = self.indices_bs if length > 2: indices_3[1:length - 1, :] = self.indices_is.repeat( (length - 2, 1)) indices_3[length - 1, :] = self.indices_es # shape = [num_label] partition_1 = None partition_3 = None # shape = [] prev_label = self.index_bos tgt_energy = 0 for t in range(length): # shape = [num_label, num_label] curr_energy = energy[t] if t == 0: partition_1 = curr_energy[self.index_bos, :] partition_3 = energy.new_full((num_label_3, ), -1e4) else: # shape = [num_label] partition = partition_1.clone() partition[indices_3[t - 1]] = partition_3 partition_1 = logsumexp(curr_energy + partition_1.unsqueeze(1), dim=0) partition_3 = logsumexp(curr_energy[:, indices_3[t]] + partition.unsqueeze(1), dim=0) label = target[t] tgt_energy += curr_energy[prev_label, label] prev_label = label t = length - 1 curr_energy = self.trans_matrix.data[:, self.index_eos] partition = curr_energy + partition_1 partition[indices_3[t]] = curr_energy[indices_3[t]] + partition_3 return logsumexp(partition, dim=0) - tgt_energy
def pad_last_dim(d: Tensor, length_min: int = 160) -> Tensor: """ Pad last dimension with 0 if length is not enough. If input is longer than `length_min`, nothing happens. [..., L<160] => [..., L==160] """ shape = d.size() length_d = shape[-1] if length_d < length_min: a = torch.zeros([*shape[:-1], length_min - length_d]) return torch.cat((d, a), -1) else: return d
def forward(self, img: Tensor, labels: int): """Forward pass of the Discriminator. Args: img: the image that should be classified in fake or real. labels: the label of the image. Returns: """ d_in = torch.cat( (img.view(img.size(0), -1), self.label_embedding(labels)), -1) score = self.model(d_in) return score
def _get_matrix(self, x:Tensor) -> Tensor: r''' Converts flat data to matrix via lookup-and-reshaping, elements not present in flat data are set to zero Arguments: x: flat data Returns: 2D matrix on device ''' mat = x[:,self.lookup] mat[:,self.missing] = 0 mat = mat.reshape((x.size(0),len(self.vecs),len(self.fpv)) if self.row_wise else (x.size(0),len(self.fpv),len(self.vecs))) return to_device(mat)
def forward(self, x: Tensor, target: Tensor): """ @param x: shape -> [length, number_of_class] @param target: shape -> [length] """ assert x.size(-1) == self.number_of_class true_dist: Tensor = x.data.clone() # 2: 1 give to the target class/label, 1 give padding true_dist.fill_(self.smoothing / (self.number_of_class - 2)) true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) true_dist[:, self.padding_idx] = 0 mask: Tensor = torch.nonzero(target == self.padding_idx) if mask.dim() > 0: true_dist.index_fill_(0, mask.squeeze(), 0.0) self.true_dist = true_dist return self.criteron(x, Variable(true_dist, requires_grad=False))
def loss(self, input: Tensor, target: Tensor, mask: Tensor = None) -> Tuple[Tensor, Tensor]: """ Args: input: Tensor the input tensor with shape = [batch, length, input_size] target: Tensor the tensor of target labels with shape [batch, length] mask: Tensor or None the mask tensor with shape = [batch, length] Returns: Tensor A 1D tensor for minus log likelihood loss """ batch, length, _ = input.size() energy = self.forward(input, mask=mask) # shape = [length, batch, num_label, num_label] energy_transpose = energy.transpose(0, 1) # shape = [length, batch] target_transpose = target.transpose(0, 1) # shape = [batch, num_label] partition = None # shape = [batch] batch_index = torch.arange(0, batch).type_as(input).long() prev_label = input.new_full((batch, ), self.index_bos).long() tgt_energy = input.new_zeros(batch) for t in range(length): # shape = [batch, num_label, num_label] curr_energy = energy_transpose[t] if t == 0: partition = curr_energy[:, self.index_bos, :] else: # shape = [batch, num_label] partition = logsumexp(curr_energy + partition.unsqueeze(2), dim=1) label = target_transpose[t] tgt_energy += curr_energy[batch_index, prev_label, label] prev_label = label return logsumexp( self.trans_matrix.data[:, self.index_eos].unsqueeze(0) + partition, dim=1) - tgt_energy, energy
def attention(query: Tensor, key: Tensor, value: Tensor, mask: Tensor = None, dropout=None): """ scaled dot production attention @param query shape -> batch_size, head_count, max_length, model_dim_size/head_count @param key shape -> batch_size, head_count, max_length, model_dim_size/head_count @param value shape -> batch_size, head_count, max_length, model_dim_size/head_count """ d_k = query.size(-1) scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k) if mask is not None: scores.masked_fill(mask == 0, -1e9) p_attention = F.softmax(scores, dim=-1) if dropout is not None: p_attention = dropout(p_attention) return torch.matmul(p_attention, value), p_attention
def step(self, input: Tensor, hx: Tuple[Tensor, Tensor] = None, mask: Tensor = None) -> Tuple[Tensor, Tensor]: """ execute one step forward (only for one-directional RNN). Args: input (batch, input_size): input tensor of this step. hx (num_layers, batch, hidden_size): the hidden state of last step. mask (batch): the mask tensor of this step. Returns: output (batch, hidden_size): tensor containing the output of this step from the last layer of RNN. hn (num_layers, batch, hidden_size): tensor containing the hidden state of this step """ assert not self.bidirectional, "step only cannot be applied to bidirectional RNN." batch_size = input.size(0) if hx is None: hx = input.new_zeros((self.num_layers, batch_size, self.hidden_size)) hx = (hx, hx) func = rnn_f.autograd_var_masked_step(num_layers=self.num_layers, lstm=True) output, hidden = func(input, self.all_cells, hx, mask) return output, hidden
def make_std_mask(target: Tensor, pad): target_mask = (target!=pad).unsqueeze(-2) target_mask = target_mask & Variable(subsequent_mask(target.size(-1)).type_as(target_mask.data)) return target_mask
def forward(self, x: Tensor) -> Tensor: H = self.mab1(self.inducing_points.repeat(x.size(0), 1, 1), x) return self.mab2(x, H)
def forward(self, x: Tensor) -> Tensor: return self.mab(self.seed_vectors.repeat(x.size(0), 1, 1), x)
def adaptive_scaling_loss(logits: Tensor, targets: Tensor, positive_idx: Tensor, mask: Tensor = None, beta: float = 1.0, reduction='none', weight_trainable: bool = False): """ :param logits: (batch, num_label) :param targets: (batch, ) :param positive_idx: (num_label) size is the number of all labels, positive_idx is 1, negative_idx is 0 :param mask: (batch, ) :param beta: float :param reduction: Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, ``'mean'``: the sum of the output will be divided by the number of elements in the output, ``'sum'``: the output will be summed. :param weight_trainable: bool False, Stop gradient at weight beta True, gradient from beta weight back propagated to other parameters :return: """ batch_size, num_label = logits.size() probs = allennlp_nn_utils.masked_softmax(logits, mask=mask) assert positive_idx.size(0) == num_label pos_label_mask = positive_idx.unsqueeze(0).expand(batch_size, num_label).to(logits.device) neg_label_mask = 1 - pos_label_mask targets_index = targets.unsqueeze(-1) tp = torch.sum(torch.gather(probs * pos_label_mask, 1, targets_index)) tn = torch.sum(torch.gather(probs * neg_label_mask, 1, targets_index)) p_vector = torch.gather(pos_label_mask, 1, targets_index).squeeze(-1).float() n_vector = torch.gather(neg_label_mask, 1, targets_index).squeeze(-1).float() p_sum = torch.sum(p_vector) n_sum = torch.sum(n_vector) weight_beta = tp / (beta * beta * p_sum + n_sum - tn) weight_beta = n_vector * weight_beta + p_vector if not weight_trainable: weight_beta.detach_() loss = nn.functional.cross_entropy(input=logits, target=targets, reduction='none') if mask is None: weight_loss = loss * weight_beta else: weight_loss = loss * weight_beta * mask if reduction == 'sum': return torch.sum(weight_loss) elif reduction == 'mean': if mask is None: return torch.mean(weight_loss) else: return torch.sum(weight_loss) / (torch.sum(mask) + 1e-13) elif reduction == 'none': return weight_loss else: raise NotImplementedError('reduction %s in ``adaptive_scaling_loss`` is not Implemented' % reduction)
def __call__(self, tensor: Tensor) -> Tensor: # return tensor + torch.randn(tensor.size()) * self.std + self.mean # Clamp output so image with noise is still greyscale: return torch.clamp( tensor + torch.randn(tensor.size()) * self.std + self.mean, 0, 1)
def _augment(self, data: Tensor) -> Tensor: index = torch.randint(size=(data.size(0), ), low=0, high=4) angles = self._pos_angles[index].squeeze(-1) return rotate(data, angles)