def __init__(self, in_features, out_features, _dim_noise_input): super().__init__() self.dim_input = in_features self.dim_output = out_features self.dim_noise_input = _dim_noise_input self.dim_output_params = in_features * out_features * 2 + out_features * 2 self.num_hidden = np.min([ self.dim_output_params, int((_dim_noise_input + self.dim_output_params) / 2) ]) self.prior_sigma = torch.scalar_tensor(1.0) self.sivi_net = Sequential( Linear(self.dim_noise_input, self.num_hidden), Tanh(), Linear(self.num_hidden, self.num_hidden), Tanh(), Linear(self.num_hidden, self.dim_output_params) ) # weight matrix x mu x logsigma + bias x mu x logsigma self.noise_dist = Normal(loc=torch.zeros((self.dim_noise_input, )), scale=torch.ones((self.dim_noise_input, ))) self.sivi_net.apply(self.init_weights)
def compute_score(self, references: List[str], hypothesis: List[List[str]]) -> Tensor: scores = self.rouge.evaluate(references, hypothesis) rouge_l_scores = scores['rouge-l'] # 3 scores = Recall r, Precision p, FScore f # {'r': ..., 'p': ..., 'f': ...} f_score = rouge_l_scores['f'] return torch.scalar_tensor(f_score)
def _create_classification_targets(self, groundtruth_labels, match: Match): """Create classification targets for each anchor. Assign a classification target of for each anchor to the matching groundtruth label that is provided by match. Anchors that are not matched to anything are given the target self._unmatched_cls_target Args: groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] with labels for each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty (corresponding to scalar labels). match: a matcher.Match object that provides a matching between anchors and groundtruth boxes. Returns: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has shape [num_gt_boxes, d_1, d_2, ... d_k]. """ if self._unmatched_cls_target is not None: uct = self._unmatched_cls_target else: uct = torch.scalar_tensor(0, device=groundtruth_labels.device) return match.gather_based_on_match(groundtruth_labels, unmatched_value=uct, ignored_value=uct)
def fit(self, X, y, max_epochs): """ :param X: Input Tensor :param y: Output tensor :param max_epochs: Number of epochs the complete dataset is passed through the model :return: learned weight of the svm model """ weight = torch.randn((1, X.shape[1]), dtype=torch.double) * torch.sqrt( torch.scalar_tensor(1. / X.shape[1])) cost_threshold = 0.0001 previous_cost = float('inf') nth = 0 for epoch in range(1, max_epochs + 1): X, y = shuffle(X, y) for idx, x in enumerate(X): weight_update = self.gradient_update( weight, torch.tensor(x).unsqueeze(0), y[idx]) weight = weight - (self.learning_rate * weight_update) if epoch % 100 == 0: cost = self.loss(X, weight, y) print(f'Loss at epoch {epoch}: {cost}') if abs(previous_cost - cost) < cost_threshold * previous_cost: return weight previous_cost = cost nth += 1 return weight
def training_step(self, batch, batch_idx): x, y = batch[:, 0:self. n_steps_past, :, :, :], batch[:, self.n_steps_past:, :, :, :] x = x.permute(0, 1, 4, 2, 3) y = y.squeeze() y_hat = self.forward(x).squeeze() # is squeeze neccessary? #Permutation ajoutée pour avoir correspondance entre Y et Y_hat dans la loss function : peut-être pas une solution y_hat = y_hat.permute(0, 2, 3, 4, 1) loss = self.criterion(y_hat, y) # save learning_rate lr_saved = self.trainer.optimizers[0].param_groups[-1]['lr'] lr_saved = torch.scalar_tensor(lr_saved).cuda() # save predicted images every 250 global_step if self.log_images: if self.global_step % 250 == 0: final_image = self.create_video(x, y_hat, y) self.logger.experiment.add_image( 'epoch_' + str(self.current_epoch) + '_step' + str(self.global_step) + '_generated_images', final_image, 0) plt.close() tensorboard_logs = {'train_mse_loss': loss, 'learning_rate': lr_saved} return {'loss': loss, 'log': tensorboard_logs}
def __init__(self, num_embeddings: int, embedding_dim: int, max_len: int = 1024, dropout: float = 0.1): """""" self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim self.max_len = max_len self.dropout = dropout # pos_embs: (max_len, emb_dim) pos_embs = torch.zeros(self.max_len, self.embedding_dim) # pos: (max_len, 1) pos = torch.arange(self.max_len).unsqueeze(1) # divs: divs = torch.pow( 10000, torch.arange(self.embedding_dim).float().div(self.embedding_dim)) pos_embs[:, 0::2] = torch.sin(pos / divs[0::2]) pos_embs[:, 1::2] = torch.cos(pos / divs[1::2]) # pos_embs: (max_len, 1, emb_dim) pos_embs.unsqueeze_(1) sqrt_dim = torch.scalar_tensor(self.embedding_dim).sqrt() # Call parent's init() first super().__init__(num_embeddings, embedding_dim, padding_idx=0) # Register non-learnable params as buffers self.register_buffer('pos_embs', pos_embs) self.register_buffer('sqrt_dim', sqrt_dim) # Create dropout layer self.dropout_layer = torch.nn.Dropout(p=self.dropout)
def heatmaps_to_keypoints(maps, rois): """Extract predicted keypoint locations from heatmaps. Output has shape (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) for each keypoint. """ # This function converts a discrete image coordinate in a HEATMAP_SIZE x # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain # consistency with keypoints_to_heatmap_labels by using the conversion from # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a # continuous coordinate. offset_x = rois[:, 0] offset_y = rois[:, 1] widths = rois[:, 2] - rois[:, 0] heights = rois[:, 3] - rois[:, 1] widths = widths.clamp(min=1) heights = heights.clamp(min=1) widths_ceil = widths.ceil() heights_ceil = heights.ceil() num_keypoints = maps.shape[1] if torchvision._is_tracing(): xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop( maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, torch.scalar_tensor(num_keypoints, dtype=torch.int64)) return xy_preds.permute(0, 2, 1), end_scores xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device) end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device) for i in range(len(rois)): roi_map_width = int(widths_ceil[i].item()) roi_map_height = int(heights_ceil[i].item()) width_correction = widths[i] / roi_map_width height_correction = heights[i] / roi_map_height roi_map = torch.nn.functional.interpolate(maps[i][None], size=(roi_map_height, roi_map_width), mode='bicubic', align_corners=False)[0] # roi_map_probs = scores_to_probs(roi_map.copy()) w = roi_map.shape[2] pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1) x_int = pos % w y_int = tensor_floordiv((pos - x_int), w) # assert (roi_map_probs[k, y_int, x_int] == # roi_map_probs[k, :, :].max()) x = (x_int.float() + 0.5) * width_correction y = (y_int.float() + 0.5) * height_correction xy_preds[i, 0, :] = x + offset_x[i] xy_preds[i, 1, :] = y + offset_y[i] xy_preds[i, 2, :] = 1 end_scores[i, :] = roi_map[torch.arange(num_keypoints), y_int, x_int] return xy_preds.permute(0, 2, 1), end_scores
def f(k, n, P1, P2): kappa = sum(k) result = torch.scalar_tensor(0) for n0 in range(kappa, (kappa + n) // 2 + 1): result += fn(k, numpy.array([n0, kappa + n - 2 * n0, n0 - kappa]), P1, P2) return result
def finish_episode(): R = 0 policy_loss = [] value_loss = [] rewards = [] for episode_id, episode_reward_list in enumerate(policy.rewards): for i, r in enumerate(episode_reward_list): if i == len(episode_reward_list) - 1: R = torch.scalar_tensor(r) else: R = r + args.gamma * policy.saved_log_probs[episode_id][i + 1][1] rewards.append(R) if is_cuda: rewards = rewards.cuda() flatten_log_probs = [ sample for episode in policy.saved_log_probs for sample in episode ] assert len(flatten_log_probs) == len(rewards) for (log_prob, value), reward in zip(flatten_log_probs, rewards): advantage = reward - value # A(s,a) = r + gamma V(s_t+1) - V(s_t) policy_loss.append(-log_prob * advantage) # 策略梯度 value_loss.append( F.smooth_l1_loss(value.reshape(-1), reward.reshape(-1))) # 值函数近似 optimizer.zero_grad() policy_loss = torch.stack(policy_loss).sum() value_loss = torch.stack(value_loss).sum() loss = policy_loss + value_loss if is_cuda: loss.cuda() loss.backward() optimizer.step() del policy.rewards[:] del policy.saved_log_probs[:]
def __init__( self, reduction: str = 'mean', dim: Optional[int] = -1, epsilon: float = DEFAULT_EPSILON, base: Optional[float] = None, log_input: bool = False, ): """ Compute the entropy of a distribution. :param reduction: The reduction used between batch entropies. (default: 'mean') :param dim: The dimension to apply the sum in entropy formula. (default: -1) :param epsilon: The epsilon precision to use. Must be a small positive float. (default: DEFAULT_EPSILON) :param base: The log-base used. If None, use the natural logarithm (i.e. base = torch.exp(1)). (default: None) :param log_input: If True, the input must be log-probabilities. (default: False) """ super().__init__() self.reduce_fn = get_reduction_from_name(reduction) self.dim = dim self.epsilon = epsilon self.log_input = log_input if base is None: self.log_func = torch.log else: log_base = torch.log(torch.scalar_tensor(base)) self.log_func = lambda x: torch.log(x) / log_base
def compute_score(self, candidate: str, references: List[str]) -> Tensor: score = meteor_score(references, candidate, alpha=self.alpha, beta=self.beta, gamma=self.gamma) return torch.scalar_tensor(score)
def reduce_metrics(self, logging_outputs, criterion): super().reduce_metrics(logging_outputs, criterion) zero = torch.scalar_tensor(0.0) num_char_errors = sum( log.get("_num_char_errors", zero) for log in logging_outputs) num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs) num_word_errors = sum( log.get("_num_word_errors", zero) for log in logging_outputs) num_words = sum(log.get("_num_words", zero) for log in logging_outputs) metrics.log_scalar("_num_char_errors", num_char_errors) metrics.log_scalar("_num_chars", num_chars) metrics.log_scalar("_num_word_errors", num_word_errors) metrics.log_scalar("_num_words", num_words) if num_chars > 0: metrics.log_derived( "uer", lambda meters: meters["_num_char_errors"].sum * 100.0 / meters[ "_num_chars"].sum if meters["_num_chars"].sum > 0 else float("nan"), ) if num_words > 0: metrics.log_derived( "wer", lambda meters: meters["_num_word_errors"].sum * 100.0 / meters[ "_num_words"].sum if meters["_num_words"].sum > 0 else float("nan"), )
def __init__(self, _dim_in=-1, _dim_out=-1, _dim_noise_input=10, _kernel_size=1, _stride=1): super().__init__() self.dim_in = _dim_in self.dim_out = _dim_out self.dim_noise_in = _dim_noise_input self.kernel_size = _kernel_size self.stride = _stride self.dim_params = _dim_in * _dim_out * _kernel_size * _kernel_size * 2 + _dim_out * 2 self.num_hidden = np.min( [self.dim_params, int((_dim_noise_input + self.dim_params) / 2)]) self.prior_sigma = torch.scalar_tensor(1.0) self.sivi_net = Sequential( Linear(self.dim_noise_in, self.num_hidden), ReLU(), Linear(self.num_hidden, self.num_hidden), ReLU(), Linear(self.num_hidden, self.dim_params )) # weight matrix x mu x logsigma + bias x mu x logsigma self.noise_dist = Normal(loc=torch.zeros((self.dim_noise_in, )), scale=torch.ones((self.dim_noise_in, )))
def get_loss(self, tensor_dicts: List[TensorDict]): loss = torch.scalar_tensor(0).float().to(device) for tensor_dict in tensor_dicts: input_tensor = tensor_dict.get(self.input_tensor_key) target_tensor = tensor_dict.get(self.target_tensor_key) loss += self.weight * self.loss_function.forward( input_tensor, target_tensor) return loss
def _onnx_heatmaps_to_keypoints(maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i): num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64) width_correction = widths_i / roi_map_width height_correction = heights_i / roi_map_height roi_map = F.interpolate(maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False)[:, 0] w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64) pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1) x_int = pos % w y_int = (pos - x_int) // w x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32) ) * width_correction.to(dtype=torch.float32) y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32) ) * height_correction.to(dtype=torch.float32) xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32) xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32) xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32) xy_preds_i = torch.stack( [ xy_preds_i_0.to(dtype=torch.float32), xy_preds_i_1.to(dtype=torch.float32), xy_preds_i_2.to(dtype=torch.float32), ], 0, ) # TODO: simplify when indexing without rank will be supported by ONNX base = num_keypoints * num_keypoints + num_keypoints + 1 ind = torch.arange(num_keypoints) ind = ind.to(dtype=torch.int64) * base end_scores_i = (roi_map.index_select( 1, y_int.to(dtype=torch.int64)).index_select( 2, x_int.to(dtype=torch.int64)).view(-1).index_select( 0, ind.to(dtype=torch.int64))) return xy_preds_i, end_scores_i
def __init__( self, std: float = 5.0, normalize: bool = True, ): super(GeodesicGaussian, self).__init__() std = std / 100.0 * np.pi self.register_buffer("std", torch.scalar_tensor(std)) self.normalize = normalize
def weight_initialization(self, n_features): """ :param n_features: Number of features in the data :return: creating weight vector using uniform distribution. """ limit = 1 / torch.sqrt(torch.scalar_tensor(n_features)) #self.w = torch.FloatTensor((n_features,)).uniform(-limit, limit) self.w = torch.distributions.uniform.Uniform(-limit, limit).sample((n_features, 1)) self.w = self.w.type(torch.DoubleTensor)
def paste_masks_in_image(masks, boxes, img_shape, padding=1): # type: (Tensor, Tensor, Tuple[int, int], int) masks, scale = expand_masks(masks, padding=padding) boxes = expand_boxes(boxes, scale).to(dtype=torch.int64) im_h, im_w = img_shape if torchvision._is_tracing(): return _onnx_paste_masks_in_image_loop( masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64))[:, None] res = [ paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes) ] if len(res) > 0: ret = torch.stack(res, dim=0)[:, None] else: ret = masks.new_empty((0, 1, im_h, im_w)) return ret
def batch_symmetric_tensor(inputs: Tensor, permutation_group: List[List[int]]): symmetric_outputs: Tensor = inputs for sigma in permutation_group: for i in range(inputs.shape[0]): symmetric_outputs[i] = symmetric_outputs[i] + inputs[i].permute( sigma) return symmetric_outputs / torch.scalar_tensor(len(permutation_group) + 1)
def _initial_params(self): # initialize means and betas according to the default values in PhysNet # https://pubs.acs.org/doi/10.1021/acs.jctc.9b00181 start_value = torch.exp( torch.scalar_tensor(-self.cutoff_upper + self.cutoff_lower)) means = torch.linspace(start_value, 1, self.num_rbf) betas = torch.tensor([(2 / self.num_rbf * (1 - start_value))**-2] * self.num_rbf) return means, betas
def forward(self, a: int): values = torch.tensor([4., 1., 1., 16.], ) if a == 0: return torch.gradient(values, spacing=torch.scalar_tensor( 2., dtype=torch.float64)) elif a == 1: return torch.gradient(values, spacing=[torch.tensor(1.).item()])
def forward(self, logits, label): assert torch.all(label < self.NUM_LABELS) loss = torch.scalar_tensor(0.).to(logits) for i in range(self.NUM_LABELS): target_mask = label == i if torch.any(target_mask): loss += self.crit(logits[target_mask], label[target_mask].to( torch.float)) / self.NUM_LABELS return loss
def test_loss_calculation(): print("======MLM & NSP Loss Calculation Test Case======") CLS = BytePairEncoding.CLS_token_idx SEP = BytePairEncoding.SEP_token_idx MSK = BytePairEncoding.MSK_token_idx torch.manual_seed(1234) model = MLMandNSPmodel(100) samples = [] src = [CLS] + [10, 10, 10, 10, 10, 10] + [SEP] + [20, 20, 20, 20, 20] + [SEP] mlm = [CLS] + [10, 10, 10, 10, MSK, 10] + [SEP] + [MSK, 20, 20, 15, 20] + [SEP] mask = [False, False, True, False, False, True, False, False, True, False, False, True, False, False] nsp = True samples.append((src, mlm, mask, nsp)) src = [CLS] + [30, 30, 30] + [SEP] + [40, 40, 40, 40] + [SEP] mlm = [CLS] + [MSK, 30, 30] + [SEP] + [40, 45, 40, 40] + [SEP] mask = [False, True, False, True, False, False, True, False, False, False] nsp = False samples.append((src, mlm, mask, nsp)) src = [CLS] + [10, 20, 30, 40] + [SEP] + [50, 40, 30, 20, 10] + [SEP] mlm = [CLS] + [10, MSK, 30, 40] + [SEP] + [50, MSK, 30, 25, 10] + [SEP] mask = [False, False, True, False, False, False, False, True, False, True, False ,False] nsp = True samples.append((src, mlm, mask, nsp)) src, mlm, mask, nsp = utils.pretrain_collate_fn(samples) MLM_loss, NSP_loss = calculate_losses(model, src, mlm, mask, nsp) # First test assert MLM_loss.allclose(torch.scalar_tensor(5.12392426), atol=1e-2), \ "Your MLM loss does not match the expected result" print("The first test passed!") # Second test assert NSP_loss.allclose(torch.scalar_tensor(0.59137219), atol=1e-2), \ "Your NSP loss does not match the expected result" print("The second test passed!") print("All 2 tests passed!")
def inference_start_end( start_probs: torch.Tensor, end_probs: torch.Tensor, context_start_pos: int, context_end_pos: int, ): """ Inference fucntion for the start and end token position. Find the start and end positions of the answer which maximize p(start, end | context_start_pos <= start <= end <= context_end_pos) Note: assume that p(start) and p(end) are independent. Hint: torch.tril or torch.triu function would be helpful. Arguments: start_probs -- Probability tensor for the start position in shape (sequence_length, ) end_probs -- Probatility tensor for the end position in shape (sequence_length, ) context_start_pos -- Start index of the context context_end_pos -- End index of the context """ assert start_probs.sum().allclose(torch.scalar_tensor(1.0)) assert end_probs.sum().allclose(torch.scalar_tensor(1.0)) ### YOUR CODE HERE (~6 lines) start_pos: int = context_start_pos end_pos: int = context_start_pos prob = torch.triu( torch.ger( start_probs[context_start_pos:context_end_pos + 1], end_probs[context_start_pos:context_end_pos + 1], )) values, indices1 = torch.max(prob, 0) _, indices2 = torch.max(values, 0) index_col = indices2.item() index_row = indices1.data[index_col].item() start_pos += index_row end_pos += index_col ### END YOUR CODE return start_pos, end_pos
def perplexity(self, inputs): if isinstance(inputs, str): inputs = [inputs] inputs = self.tokenizer.batch_encode_plus(inputs, pad_to_max_length=True) attention_mask = torch.tensor(inputs['attention_mask'], device=self.device) inputs = torch.tensor(inputs['input_ids'], device=self.device) loss = torch.scalar_tensor(20.0) if inputs.shape[1] > 1: loss = self.lm_model(inputs, attention_mask=attention_mask, labels=inputs)[0].squeeze() return torch.exp(loss)
def _predict(self, state, **kwargs): """Chooses the first fitting node as the action.""" current_sfc = self.env.request_batch[self.env.sfc_idx] current_vnf = current_sfc.vnfs[self.env.vnf_idx] for node in range(self.env.vnf_backtrack.num_nodes): if self.env.vnf_backtrack.check_vnf_resources(current_vnf, current_sfc.bandwidth_demand, node): return th.scalar_tensor(node, dtype=th.int16) return self.env.vnf_backtrack.num_nodes
def get_std(self) -> Optional[Tensor]: if not self.is_empty(): std = torch.sqrt(self._items_sq_sum / self._counter - (self._items_sum / self._counter)**2) if self._unbiased: std = std * torch.scalar_tensor(self._counter / (self._counter - 1)).sqrt() return std else: return None
def inference_start_end(start_probs: torch.Tensor, end_probs: torch.Tensor, context_start_pos: int, context_end_pos: int): """ Inference fucntion for the start and end token position. Find the start and end positions of the answer which maximize p(start, end | context_start_pos <= start <= end <= context_end_pos) Note: assume that p(start) and p(end) are independent. Hint: torch.tril or torch.triu function would be helpful. Arguments: start_probs -- Probability tensor for the start position in shape (sequence_length, ) end_probs -- Probatility tensor for the end position in shape (sequence_length, ) context_start_pos -- Start index of the context context_end_pos -- End index of the context """ assert start_probs.sum().allclose(torch.scalar_tensor(1.)) assert end_probs.sum().allclose(torch.scalar_tensor(1.)) ### YOUR CODE HERE (~6 lines) start_pos, end_pos = 0, 0 start_end_probs = torch.stack((start_probs, end_probs)) _, start_pos = start_probs.max(-1) start_pos = start_pos.item() while True: if start_pos < context_start_pos or start_pos == context_end_pos: start_probs[start_pos] = 0.0 _, start_pos = start_probs.max(-1) else: start_end_probs = torch.triu(start_end_probs, diagonal=start_pos) _, end_pos = start_end_probs[1].max(-1) break if end_pos > context_end_pos: start_end_probs[1][end_pos] = 0.0 _, end_pos = start_end_probs[1].max(-1) ### END YOUR CODE return start_pos, end_pos
def _predict(self, state, factor=1, **kwargs): current_sfc = self.env.request_batch[self.env.sfc_idx] positive_reward = current_sfc.bandwidth_demand costs = self.env.vnf_backtrack.costs negative_reward = sum([vnf[0]*costs['cpu']+vnf[1]*costs['memory'] for vnf in current_sfc.vnfs]) if positive_reward < factor*negative_reward: return th.scalar_tensor(self.env.vnf_backtrack.num_nodes, dtype=th.int16) return super()._predict(state, **kwargs)
def reduce_metrics(self, logging_outputs, criterion): super().reduce_metrics(logging_outputs, criterion) if self.cfg.eval_wer: zero = torch.scalar_tensor(0.0) num_char_errors = sum( log.get("_num_char_errors", zero) for log in logging_outputs ) num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs) num_word_errors = sum( log.get("_num_word_errors", zero) for log in logging_outputs ) num_words = sum(log.get("_num_words", zero) for log in logging_outputs) metrics.log_scalar("_num_char_errors", num_char_errors) metrics.log_scalar("_num_chars", num_chars) metrics.log_scalar("_num_word_errors", num_word_errors) metrics.log_scalar("_num_words", num_words) if num_chars > 0: metrics.log_derived( "uer", lambda meters: meters["_num_char_errors"].sum * 100.0 / meters["_num_chars"].sum if meters["_num_chars"].sum > 0 else float("nan"), ) if num_words > 0: metrics.log_derived( "wer", lambda meters: meters["_num_word_errors"].sum * 100.0 / meters["_num_words"].sum if meters["_num_words"].sum > 0 else float("nan"), ) if self.cfg.eval_bleu: len_keys = ["_bleu_sys_len", "_bleu_ref_len"] count_keys = [f"_bleu_counts_{i}" for i in range(4)] total_keys = [f"_bleu_totals_{i}" for i in range(4)] for k in len_keys + count_keys + total_keys: metrics.log_scalar(k, sum(log.get(k, 0) for log in logging_outputs)) import sacrebleu metrics.log_derived( "bleu", lambda meters: sacrebleu.compute_bleu( correct=[meters[k].sum for k in count_keys], total=[meters[k].sum for k in total_keys], sys_len=meters["_bleu_sys_len"].sum, ref_len=meters["_bleu_ref_len"].sum, smooth_method="exp", ).score, )