def forward(ctx, spike_a: torch.Tensor, spike_b: torch.Tensor): # y = spike_a * spike_b assert spike_a.shape == spike_b.shape, print('x.shape != spike.shape') # 禁用广播机制 if spike_a.dtype == torch.bool: spike_a_bool = spike_a else: spike_a_bool = spike_a.bool() if spike_b.dtype == torch.bool: spike_b_bool = spike_b else: spike_b_bool = spike_b.bool() if spike_a.dtype == torch.bool and spike_b.dtype == bool: # 若spike_a 和 spike_b 都是bool,则不应该需要计算梯度,因bool类型的tensor无法具有gard return spike_a_bool.logical_and(spike_b_bool) if spike_a.requires_grad and spike_b.requires_grad: ctx.save_for_backward(spike_b_bool, spike_a_bool) elif spike_a.requires_grad and not spike_b.requires_grad: ctx.save_for_backward(spike_b_bool) elif not spike_a.requires_grad and spike_b.requires_grad: ctx.save_for_backward(spike_a_bool) ret = spike_a_bool.logical_and(spike_b_bool).float() ret.requires_grad_(spike_a.requires_grad or spike_b.requires_grad) return ret
def ranking(self, scores: torch.Tensor, targets: torch.Tensor, filtered_mask: torch.Tensor): query_size = scores.size(0) vocabulary_size = scores.size(1) target_scores = scores[range(query_size), targets].unsqueeze(1).repeat( (1, vocabulary_size)) # TODO(gengyuan) assert self.preference in ["optimistic", "pessimistic"] assert self.ordering in ["ascending", "descending"] if self.ordering == "ascending": scores = scores.masked_fill(filtered_mask.bool(), 1e6) if self.preference == "optimistic": comp = scores.lt(target_scores) else: comp = scores.le(target_scores) else: scores = scores.masked_fill(filtered_mask.bool(), -1e6) if self.preference == "optimistic": comp = scores.gt(target_scores) else: comp = scores.ge(target_scores) ranks = comp.sum(1) + 1 return ranks.float()
def step( self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ combined_output = None ### COPY OVER YOUR CODE FROM ASSIGNMENT 4 # 1, dec_state = self.decoder(Ybar_t, dec_state) (dec_hidden, dec_cell) = dec_state # 3, (b, src_len, h) .dot(b, h, 1) -> (b, src_len, 1) -> (b, src_len) e_t = enc_hiddens_proj.bmm(dec_hidden.unsqueeze(2)).squeeze(2) ### END YOUR CODE FROM ASSIGNMENT 4 # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: # e_t.data.masked_fill_(enc_masks.byte(), -float('inf')) e_t.data.masked_fill_(enc_masks.bool(), -float('inf')) ### COPY OVER YOUR CODE FROM ASSIGNMENT 4 # 1, apply softmax to e_t alpha_t = F.softmax(e_t, dim=1) # (b, src_len) # 2, (b, 1, src_len) x (b, src_len, 2h) = (b, 1, 2h) -> (b, 2h) # a_t = e_t.unsqueeze(1).bmm(enc_hiddens).squeeze(1) att_view = (alpha_t.size(0), 1, alpha_t.size(1)) a_t = torch.bmm(alpha_t.view(*att_view), enc_hiddens).squeeze(1) # 3, concate a_t (b, 2h) and dec_hidden (b, h) to U_t (b, 3h) U_t = torch.cat((a_t, dec_hidden), dim=1) # 4, apply combined output to U_T -> V_t, shape (b, h) V_t = self.combined_output_projection(U_t) O_t = self.dropout(torch.tanh(V_t)) ### END YOUR CODE FROM ASSIGNMENT 4 combined_output = O_t return dec_state, combined_output, e_t
def compute_dice_across_patches(segmentation: torch.Tensor, ground_truth: torch.Tensor, allow_multiple_classes_for_each_pixel: bool = False) -> torch.Tensor: """ Computes the Dice scores for all classes across all patches in the arguments. :param segmentation: Tensor containing class ids predicted by a model. :param ground_truth: One-hot encoded torch tensor containing ground-truth label ids. :param allow_multiple_classes_for_each_pixel: If set to False, ground-truth tensor has to contain only one foreground label for each pixel. :return A torch tensor of size (Patches, Classes) with the Dice scores. Dice scores are computed for all classes including the background class at index 0. """ check_size_matches(segmentation, ground_truth, 4, 5, [0, -3, -2, -1], arg1_name="segmentation", arg2_name="ground_truth") # One-hot encoded ground-truth values should sum up to one for all pixels if not allow_multiple_classes_for_each_pixel: if not torch.allclose(torch.sum(ground_truth, dim=1).float(), torch.ones(segmentation.shape, device=ground_truth.device).float()): raise Exception("Ground-truth one-hot matrix does not sum up to one for all pixels") # Convert the ground-truth to one-hot-encoding [num_patches, num_classes] = ground_truth.size()[:2] one_hot_segmentation = F.one_hot(segmentation, num_classes=num_classes).permute(0, 4, 1, 2, 3) # Convert the tensors to bool tensors one_hot_segmentation = one_hot_segmentation.bool().view(num_patches, num_classes, -1) ground_truth = ground_truth.bool().view(num_patches, num_classes, -1) # And operation between segmentation and ground-truth - reduction operation # Count the number of samples in segmentation and ground-truth intersection = 2.0 * torch.sum(one_hot_segmentation & ground_truth, dim=-1).float() union = torch.sum(one_hot_segmentation, dim=-1) + torch.sum(ground_truth, dim=-1).float() + 1.0e-6 return intersection / union
def step_qa(self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor): """One forward step of the decoder. :param Y_t: (batch_size, embed_size) The first tokens of each of the mini-batch of sents. :param dec_state: ... :returns dec_state: the current state of decoder. :returns output: the current hidden state of decoder. """ dec_state = self.decoder2(Ybar_t, dec_state) dec_hidden, dec_cell = dec_state e_t = torch.squeeze(torch.bmm(enc_hiddens_proj, torch.unsqueeze(dec_hidden, 2)), dim=2) # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.bool(), -float('inf')) alpha_t = F.softmax(e_t, dim=1) a_t = torch.squeeze( torch.bmm(torch.unsqueeze(alpha_t, 1), enc_hiddens), 1) U_t = torch.cat((a_t, dec_hidden), dim=1) V_t = self.combined_output_projection2(U_t) O_t = self.dropout2(torch.tanh(V_t)) combined_output = O_t return dec_state, combined_output, e_t
def forward(self, query: torch.Tensor, states: torch.Tensor, states_features: torch.Tensor, source_mask: torch.Tensor, coverage: torch.Tensor, is_coverage: bool) \ -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Calculating the attention using Bahdanau approach :param is_coverage: To use coverage or not :param states_features: Precalculated states features :param coverage: The previous coverage (B, L_src, 1) :param source_mask: Mask for source (B, L_src, 1) :param query: The state of target (B, L_tgt, H) :param states: The memory states of encoder (B, L_src, 2*H) :return: The weighted context (B, 1, H) """ # (B, L_tgt, L_src) alignments = self.score(query, states, states_features, coverage, is_coverage) # Set padding to zero alignments = alignments.masked_fill(~source_mask.bool().unsqueeze(2), float('-inf')) align_vectors = self._softmax(alignments) # (B, L_tgt, L_src) X (B, L_src, 2*H) = (B, L_tgt, 2*H) c = torch.bmm(align_vectors.transpose(1, 2), states) concat_c = torch.cat([c, query], 2) attn_h = self.linear_out(concat_c) # (B, L_src, 1) # attentions = attentions.transpose(1, 2).contiguous() new_coverage = coverage + align_vectors return attn_h, new_coverage, align_vectors
def add_sample(self, y_true: torch.Tensor, y_pred): if self.ignore_last_class: # Ignore background classes # y_pred = y_pred[:, :-1] # y_true = y_true[:, :-1] y_true_mask = 1 - y_true[:, [-1]] # [ B, 1, H, W] y_true_bin = y_true.bool() # [B, C, ...] # Make y_pred from decimal to one hot along dim C y_pred_argmax = torch.argmax(y_pred, dim=1, keepdim=True) # [B, C, ...] y_pred_oh = torch.zeros_like(y_pred, dtype=torch.bool).scatter_( 1, y_pred_argmax, True) # one-hot # remove the background _tp = (y_true_bin & y_pred_oh)[:, :-1] _tn = ((~y_true_bin & ~y_pred_oh) * y_true_mask)[:, :-1] #n _fp = ((~y_true_bin & y_pred_oh) * y_true_mask)[:, :-1] # * y_true_mask _fn = ((y_true_bin & ~y_pred_oh) * y_true_mask)[:, :-1] tp = _tp.float().sum(dim=self._data_dims) tn = _tn.float().sum(dim=self._data_dims) fp = _fp.float().sum(dim=self._data_dims) fn = _fn.float().sum(dim=self._data_dims) self.TP += tp self.TN += tn self.FP += fp self.FN += fn return tp, tn, fp, fn
def forward( self, pred: torch.Tensor, ref_kspace: torch.Tensor, sens_maps: torch.Tensor, mask: torch.Tensor, ) -> torch.Tensor: """ Parameters ---------- pred: Input data. ref_kspace: Reference k-space data. sens_maps: Coil sensitivity maps. mask: Mask to apply to the data. Returns ------- Reconstructed image. """ zero = torch.zeros(1, 1, 1, 1, 1).to(pred) soft_dc = torch.where(mask.bool(), pred - ref_kspace, zero) * self.dc_weight eta = self.sens_reduce(pred, sens_maps) eta = self.model(eta) eta = self.sens_expand(eta, sens_maps) if not self.no_dc: eta = pred - soft_dc - eta return eta
def forward(self, query: torch.Tensor, states: torch.Tensor, source_mask: torch.Tensor) \ -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Calculating the attention using Bahdanau approach :param coverage: The previous coverage (B, L_src, 1) :param source_mask: Mask for source (B, L_src, 1) :param query: The state of target (B, L_tgt, H) :param states: The memory states of encoder (B, L_src, 2*H) :return: The weighted context (B, 1, H) """ # (B, L_tgt, L_src) alignments = self.score(query, states) # Set padding to zero alignments = alignments.masked_fill(~source_mask.bool().unsqueeze(1), float('-inf')) attentions = self._softmax(alignments) # (B, L_tgt, L_src) X (B, L_src, 2*H) = (B, L_tgt, 2*H) context = torch.bmm(attentions, states) concat_context = torch.cat((context, query), dim=2) # (B, L_tgt, H) attention_hidden = self._context(concat_context) # (B, L_src, 1) attentions = attentions.transpose(1, 2).contiguous() return context, attention_hidden, attentions
def dot_prod_attention(h_t: Tensor, src_encodings: Tensor, src_encoding_att_linear: Tensor, mask: Tensor = None) -> Tuple[Tensor, Tensor]: """ :param h_t: (batch_size, hidden_state) :param src_encodings: (batch_size, src_sent_len, src_output_size) :param src_encoding_att_linear: (batch_size, src_sent_len, hidden_state) :param mask: (batch_size, src_sent_len), paddings are marked as 1 :return: ctx_vec: (batch_size, src_output_size) softmaxed_att_weight: (batch_size, src_sent_len) """ # (batch_size, src_sent_len) att_weight = torch.bmm(src_encoding_att_linear, h_t.unsqueeze(2)).squeeze(2) if mask is not None: att_weight.data.masked_fill_(mask.bool(), -float('inf')) softmaxed_att_weight = F.softmax(att_weight, dim=-1) att_view = (att_weight.size(0), 1, att_weight.size(1)) # (batch_size, hidden_size) ctx_vec = torch.bmm(softmaxed_att_weight.view(*att_view), src_encodings).squeeze(1) return ctx_vec, softmaxed_att_weight
def _generate_from_sum_logits( self, logits: torch.Tensor, mask: torch.Tensor, disable_special_ids: bool = True, sample: bool = False, disable: Optional[torch.Tensor] = None, ) -> torch.Tensor: # bsz, seq_len, vocab_size assert logits.dim() == 3 # bart-large and bart-large-cnn have different vocab_size but there is some # inconsistency in the implementation. if disable_special_ids: specials_ids = self._tokenizer.all_special_ids specials_ids = [ x for x in specials_ids if x < self._model.config.vocab_size ] logits[..., specials_ids] = float("-inf") if disable is not None: disable[disable == -100] = self._tokenizer.pad_token_id logits.scatter_(dim=-1, index=disable[:, :, None], src=logits.new_tensor(float("-inf"))) if sample: bsz, seq_len, vocab_size = logits.size() query_input_ids = torch.multinomial( logits.view(-1, vocab_size).softmax(dim=-1), 1) query_input_ids = query_input_ids.reshape(bsz, seq_len) else: query_input_ids = logits.argmax(-1) query_input_ids[~mask.bool()] = self._tokenizer.pad_token_id return query_input_ids
def accuracy_thresh(y_pred: Tensor, y_true: Tensor, thresh: float = CLASSIFICATION_THRESHOLD, sigmoid: bool = True): "Compute accuracy when `y_pred` and `y_true` are the same size." if sigmoid: y_pred = y_pred.sigmoid() return ((y_pred > thresh) == y_true.bool()).float().mean().item()
def iou_pytorch(self, outputs: torch.Tensor, labels: torch.Tensor): smooth = 1e-6 outputs = outputs[:, 0] > 0.5 labels = labels.squeeze( 1).round() if labels.dtype is not torch.bool else labels iou = 0.0 outputs_cls = outputs.bool() labels_cls = labels.bool() intersection = (outputs_cls & labels_cls).float().sum( (1, 2)) # Will be zero if Truth=0 or Prediction=0 union = (outputs_cls | labels_cls).float().sum( (1, 2)) # Will be zzero if both are 0 iou += (intersection + smooth) / ( union + smooth) # We smooth our devision to avoid 0/0 return torch.mean(iou)
def metrics(prediction: torch.Tensor, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: prediction = prediction.squeeze() > PREDICTION_THRESHOLD target = target.bool().squeeze() tp = float(torch.sum(target & prediction).cpu()) precision = tp / (float(prediction.sum().cpu()) + .1) recall = tp / float(target.sum().cpu() + .1) return precision, recall
def forward(ctx, x: torch.Tensor, spike: torch.Tensor): # y = x + spike assert x.shape == spike.shape, print('x.shape != spike.shape') # 禁用广播机制 if spike.dtype == torch.bool: spike_bool = spike else: spike_bool = spike.bool() return x + spike_bool
def forward(ctx, v: torch.Tensor, spike: torch.Tensor, v_threshold: float): # v = v - spike * v_threshold mask = spike.bool() # 表示释放脉冲的位置 if spike.requires_grad: ctx.v_threshold = v_threshold ret = v.clone() ret[mask] -= v_threshold return ret # 释放脉冲的位置,电压设置为v_reset,out-of-place操作
def step( self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ combined_output = None ### COPY OVER YOUR CODE FROM ASSIGNMENT 4 Ybar_t = torch.unsqueeze( Ybar_t, 0 ) #input to LSTM layer should be of 3 dimension [seq, batch, embedding size] dec_state = (torch.unsqueeze(dec_state[0], 0), torch.unsqueeze(dec_state[1], 0)) _, dec_state = self.decoder(Ybar_t, dec_state) dec_state = (torch.squeeze(dec_state[0], 0), torch.squeeze(dec_state[1], 0)) dec_hidden = torch.unsqueeze(dec_state[0], 2) e_t = torch.squeeze(torch.bmm(enc_hiddens_proj, dec_hidden), 2) dec_hidden = dec_state[0] ### END YOUR CODE FROM ASSIGNMENT 4 # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.bool(), -float('inf')) ### COPY OVER YOUR CODE FROM ASSIGNMENT 4 alpha_t = torch.unsqueeze(nn.functional.softmax(e_t, dim=1), 1) a_t = torch.squeeze(torch.bmm(alpha_t, enc_hiddens), 1) U_t = torch.cat((a_t, dec_hidden), 1) V_t = self.combined_output_projection(U_t) O_t = self.dropout(torch.tanh(V_t)) ### END YOUR CODE FROM ASSIGNMENT 4 combined_output = O_t return dec_state, combined_output, e_t
def forward(self, x: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor: """Forward call.""" if mask is not None: x_mask = (~mask.bool()).float() * (-x.max()).float() x = torch.sum(x + x_mask, dim=1, keepdim=True) x_out = x.max(1, keepdim=True)[0] return x_out
def forward(self, query_ids: torch.Tensor, query_masks: torch.Tensor, doc_ids: torch.Tensor, doc_masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: query_embed = self._embedder(query_ids) doc_embed = self._embedder(doc_ids) query_context = self._encoder( query_embed, ~query_masks.bool().unsqueeze(1).expand( -1, query_masks.size(1), -1)) doc_context = self._encoder( doc_embed, ~doc_masks.bool().unsqueeze(1).expand(-1, doc_masks.size(1), -1)) query_embed = (self._mixer * query_embed + (1 - self._mixer) * query_context) doc_embed = (self._mixer * doc_embed + (1 - self._mixer) * doc_context) logits = self._matcher(query_embed, query_masks, doc_embed, doc_masks) score = self._dense(logits).squeeze(-1) return score, logits
def add_sample(self, y_true: torch.Tensor, y_pred): y_true = y_true.bool()[None, ...] # [Thresh, B, C, ...] y_pred = y_pred[None, ...] # [Thresh, B, C, ...] y_pred_offset = (y_pred - self._thresholds + 0.5).round().bool() self.TP += (y_true & y_pred_offset).sum(dim=self._data_dims).float() self.TN += (~y_true & ~y_pred_offset).sum(dim=self._data_dims).float() self.FP += (y_true & ~y_pred_offset).sum(dim=self._data_dims).float() self.FN += (~y_true & y_pred_offset).sum(dim=self._data_dims).float()
def forward(ctx, x: torch.Tensor, spike: torch.Tensor): # y = x - spike # x乘spike,等价于将x中spike == 1的位置减去1 assert x.shape == spike.shape, print( 'x.shape != spike.shape') # 禁用广播机制 mask = spike.bool() y = x.clone() y[mask] -= 1 return y
def forward(self, logit: torch.Tensor, label: torch.Tensor) -> (torch.Tensor, list): assert logit.shape[1] == 1 label = label.bool().float() loss = -(2.0 * torch.sum(logit * label) + 1e-4) / ( torch.sum(logit) + torch.sum(label) + 1e-4) return loss, [loss.detach()]
def contrastive_distance_loss(input_vectors: torch.Tensor, ref_vectors: torch.Tensor, labels_one_hot: torch.Tensor, p: Optional[Union[float, str]] = 2, temperature: float = 1.) -> \ Tuple[torch.Tensor, torch.Tensor]: dist = l_distance(input_vectors, ref_vectors, p=p) sim = -dist / temperature total_sim = torch.logsumexp(sim, dim=-1) loss = -torch.mean(sim.masked_select(labels_one_hot.bool()) - total_sim) pred = torch.argmax(sim, dim=-1) return loss, pred
def step(self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ # Apply the decoder to `Ybar_t` and `decocer hidden states` to obtain the new decoder hidden states # `Ybar_t` is the new input vector at decode for the timestep t # split dec_state into its two parts (dec_hidden, dec_cell) dec_state = self.decoder(Ybar_t, dec_state) (dec_hidden, dec_cell) = dec_state # compute the attention score e_t e_t = torch.bmm(enc_hiddens_proj, dec_hidden.unsqueeze(2)).squeeze(2) # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.bool(), -float('inf')) # Apply softmax to e_t to get alpha_t alpha_t = F.softmax(e_t, dim=-1) alpha_t_view = (alpha_t.size(0), 1, alpha_t.size(1)) # obtain the attention output vector, a_t a_t = torch.bmm(alpha_t.view(*alpha_t_view), enc_hiddens).squeeze(1) # Concatenate the dec_hidden with a_t to compute U_t U_t = torch.cat([dec_hidden, a_t], 1) # Apply the combined output projection to compute V_t V_t = self.combined_output_projection(U_t) # Compute the tesnor O_t by first applying Tanh and dropout O_t = self.dropout(torch.tanh(V_t)) combined_output = O_t return dec_state, combined_output, e_t
def my_accuracy_thresh( y_pred: Tensor, y_true: Tensor, thresh: float = 0.7, sigmoid: bool = False, ): "Compute accuracy when `y_pred` and `y_true` are the same size." if sigmoid: y_pred = y_pred.sigmoid() return ((y_pred > thresh) == y_true.bool()).float().mean().item()
def forward(self, outs: torch.Tensor, labs: torch.Tensor) -> list: assert outs.shape[0] == 1 labs = labs.bool().cuda().float() outs = (outs.cuda() > 0.5).float() loss = (2.0 * torch.sum(outs * labs) + 1e-4) / (torch.sum(outs) + torch.sum(labs) + 1e-4) return [loss.detach()]
def multilabel_accuracy(y_true: torch.Tensor, y_pred: torch.Tensor, threshhold: float = 0.5) -> float: """Calculates accuracy of a multilabel classification as intersection over union of predicted and correct classes Args: y_true (torch.Tensor): Correct classes y_pred (torch.Tensor): Predicted probabilities threshhold (float, optional): Where to cut predicted probabilities. Defaults to 0.5. Returns: float: [description] """ pred_labels = y_pred > threshhold intersect = pred_labels & y_true.bool() union = pred_labels | y_true.bool() acc = intersect.sum(dim=1).float() / union.sum(dim=1).float() return acc.mean().item()
def step(self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ combined_output = None dec_state = self.decoder(Ybar_t,dec_state) dec_hidden, dec_cell = dec_state #print(enc_hiddens_proj.shape,dec_hidden.shape) e_t = torch.bmm(enc_hiddens_proj,dec_hidden.unsqueeze(2)) e_t = e_t.squeeze(2) ### END YOUR CODE # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.bool(), -float('inf')) e_t[enc_masks==1]=float("-inf") softmax = nn.Softmax(dim=1) alpha_t = softmax(e_t) alpha_t = alpha_t.unsqueeze(1) # torch.bmm b*n*m , b*m*p a_t = torch.bmm(alpha_t,enc_hiddens) a_t = a_t.squeeze(1) U_t = torch.cat([a_t,dec_hidden],dim=1) v_t = self.combined_output_projection(U_t) O_t = self.dropout(torch.tanh(v_t)) ### END YOUR CODE combined_output = O_t return dec_state, combined_output, e_t
def step( self, Ybar_t: torch.Tensor, dec_state: Tuple[torch.Tensor, torch.Tensor], enc_hiddens: torch.Tensor, enc_hiddens_proj: torch.Tensor, enc_masks: torch.Tensor ) -> Tuple[Tuple, torch.Tensor, torch.Tensor]: """ Compute one forward step of the LSTM decoder, including the attention computation. @param Ybar_t (Tensor): Concatenated Tensor of [Y_t o_prev], with shape (b, e + h). The input for the decoder, where b = batch size, e = embedding size, h = hidden size. @param dec_state (tuple(Tensor, Tensor)): Tuple of tensors both with shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's prev hidden state, second tensor is decoder's prev cell. @param enc_hiddens (Tensor): Encoder hidden states Tensor, with shape (b, src_len, h * 2), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_hiddens_proj (Tensor): Encoder hidden states Tensor, projected from (h * 2) to h. Tensor is with shape (b, src_len, h), where b = batch size, src_len = maximum source length, h = hidden size. @param enc_masks (Tensor): Tensor of sentence masks shape (b, src_len), where b = batch size, src_len is maximum source length. @returns dec_state (tuple (Tensor, Tensor)): Tuple of tensors both shape (b, h), where b = batch size, h = hidden size. First tensor is decoder's new hidden state, second tensor is decoder's new cell. @returns combined_output (Tensor): Combined output Tensor at timestep t, shape (b, h), where b = batch size, h = hidden size. @returns e_t (Tensor): Tensor of shape (b, src_len). It is attention scores distribution. Note: You will not use this outside of this function. We are simply returning this value so that we can sanity check your implementation. """ combined_output = None dec_hiddens, dec_state = self.decoder( Ybar_t.unsqueeze(0), (dec_state[0].unsqueeze(0), dec_state[1].unsqueeze(0))) dec_state = (dec_state[0].squeeze(0), dec_state[1].squeeze(0)) dec_hidden, dec_cell = dec_state if dec_hidden.ndimension() == 2: dec_hidden = dec_hidden.unsqueeze(1) else: dec_hidden = dec_hidden.squeeze(0).unsqueeze(1) e_t = torch.bmm(dec_hidden, enc_hiddens_proj.permute(0, 2, 1)) e_t = e_t.permute(0, 2, 1).squeeze(2) # Set e_t to -inf where enc_masks has 1 if enc_masks is not None: e_t.data.masked_fill_(enc_masks.bool(), -float('inf')) alpha_t = F.softmax(e_t, dim=1) a_t = torch.bmm(alpha_t.unsqueeze(1), enc_hiddens) a_t = a_t.squeeze(1) if dec_hidden.ndimension() == 3 and a_t.ndimension() == 2: dec_hidden = dec_hidden.squeeze(1) U_t = torch.cat((dec_hidden, a_t), dim=1) V_t = self.combined_output_projection(U_t) O_t = self.dropout(V_t.tanh()) combined_output = O_t return dec_state, combined_output, e_t
def forward(self, indicator: torch.Tensor, upostag: torch.Tensor, words: torch.Tensor = None, mask: torch.Tensor = None, seq_lens: torch.Tensor = None, labels: torch.Tensor = None, **kwargs) -> Tuple[Union[torch.Tensor, List, Dict]]: feat = self.word_embedding(words, mask=mask, **kwargs) if self.word_transform is not None: feat = self.word_transform(feat) embs = [ self.indicator_embedding(indicator), self.pos_embedding(upostag) ] if self.depsawr_forward is not None: dep_time = time.time() dep_emb = self.depsawr_mix( self.depsawr_forward(kwargs['dw'], kwargs['ew'], mask, self.projections), mask) dep_time = time.time() - dep_time embs.append(dep_emb) else: dep_time = 0 feat = torch.cat([feat, *embs], dim=-1) feat = self.word_dropout(feat) if self.encoder is not None: feat = self.encoder(feat, seq_lens, **kwargs) feat = self.word_dropout(feat) scores = self.tag_projection_layer(feat) output = output = {'scores': scores, 'dep_time': dep_time} if not self.training: best_paths = self.crf.viterbi_tags(scores, mask, top_k=self.top_k) # Just get the top tags and ignore the scores. predicted_tags = cast(List[List[int]], [x[0][0] for x in best_paths]) output['predicted_tags'] = predicted_tags if labels is not None: # Add negative log-likelihood as loss output['loss'] = -self.crf(scores, labels, mask.bool()) if not self.training: predicted = torch.zeros_like(labels) for i, tags in enumerate(predicted_tags): predicted[i, :len(tags)] = torch.tensor( tags, dtype=torch.long, device=labels.device) output['metric'] = self.metric(indicator, mask, predicted, labels) return output