def accumulate(self, f0_target, f0_pred, seq_len, is_voiced): sequence_mask = utils.sequence_mask(seq_len, max_len=f0_target.shape[1], dtype=f0_target.dtype) mask = sequence_mask * is_voiced.type(f0_target.dtype) # Accumulate the squared difference. square_diff = (f0_target - f0_pred)**2 self.sum += torch.sum(square_diff * mask) self.count += torch.sum(mask).item()
def accumulate(self, tensor, seq_len=None): r"""tensor much have shape [batch_size, seq_len, feat_dim].""" if seq_len is None: self.sum += torch.sum(tensor) self.count += tensor.numel() else: sequence_mask = utils.sequence_mask(seq_len, max_len=tensor.shape[1], dtype=tensor.dtype) self.sum += torch.sum(tensor * sequence_mask) self.count += torch.sum(sequence_mask).item()
def accumulate(self, f0_target, f0_pred, is_voiced, seq_len=None): StatefulMetric.accumulate(self) mask = is_voiced.type(f0_target.dtype) if seq_len is not None: sequence_mask = utils.sequence_mask(seq_len, max_len=f0_target.shape[1], dtype=f0_target.dtype) mask *= sequence_mask # Accumulate the squared difference. square_diff = (f0_target - f0_pred)**2 self.sum += torch.sum(square_diff * mask) self.count += torch.sum(mask).item()
def wrapped_loss(predictions, targets, seq_len=None): feature_loss = loss_fn(predictions, targets) if seq_len is None: max_num_frames = feature_loss.shape[1] feature_loss = torch.sum(feature_loss, dim=1) / max_num_frames else: mask = utils.sequence_mask(seq_len, max_len=feature_loss.shape[1], dtype=feature_loss.dtype) num_valid_frames = torch.sum(mask, dim=1) feature_loss = torch.sum(feature_loss * mask, dim=1) / num_valid_frames # Average across all batch items and all feature dimensions. feature_loss = torch.mean(feature_loss) return feature_loss