def _forward_single(self, features: torch.Tensor, warp: bool = True, mask: bool = True) -> torch.Tensor: """ Apply SpecAugment to a single feature matrix of shape (T, F). """ if random.random() > self.p: # Randomly choose whether this transform is applied return features if warp: if self.time_warp_factor is not None and self.time_warp_factor >= 1: features = time_warp(features, factor=self.time_warp_factor) if mask: from torchaudio.functional import mask_along_axis mean = features.mean() for _ in range(self.num_feature_masks): features = mask_along_axis(features.unsqueeze(0), mask_param=self.features_mask_size, mask_value=mean, axis=2).squeeze(0) for _ in range(self.num_frame_masks): features = mask_along_axis(features.unsqueeze(0), mask_param=self.frames_mask_size, mask_value=mean, axis=1).squeeze(0) return features
def test_mask_along_axis_preserve(self, shape, mask_param, mask_value, axis): """mask_along_axis should not alter original input Tensor Test is run 5 times to bound the probability of no masking occurring to 1e-10 See https://github.com/pytorch/audio/issues/1478 """ torch.random.manual_seed(42) for _ in range(5): specgram = torch.randn(*shape, dtype=self.dtype, device=self.device) specgram_copy = specgram.clone() F.mask_along_axis(specgram, mask_param, mask_value, axis) self.assertEqual(specgram, specgram_copy)
def test_mask_along_axis(specgram, mask_param, mask_value, axis): mask_specgram = F.mask_along_axis(specgram, mask_param, mask_value, axis) other_axis = 1 if axis == 2 else 2 masked_columns = (mask_specgram == mask_value).sum(other_axis) num_masked_columns = (masked_columns == mask_specgram.size(other_axis)).sum() num_masked_columns /= mask_specgram.size(0) assert mask_specgram.size() == specgram.size() assert num_masked_columns < mask_param
def test_mask_along_axis(self, shape, mask_param, mask_value, axis): torch.random.manual_seed(42) specgram = torch.randn(*shape) mask_specgram = F.mask_along_axis(specgram, mask_param, mask_value, axis) other_axis = 1 if axis == 2 else 2 masked_columns = (mask_specgram == mask_value).sum(other_axis) num_masked_columns = (masked_columns == mask_specgram.size(other_axis)).sum() num_masked_columns //= mask_specgram.size(0) assert mask_specgram.size() == specgram.size() assert num_masked_columns < mask_param
def forward(self, specgram: Tensor, mask_value: float = 0.) -> Tensor: r""" Args: specgram (Tensor): Tensor of dimension (..., freq, time). mask_value (float): Value to assign to the masked columns. Returns: Tensor: Masked spectrogram of dimensions (..., freq, time). """ # if iid_masks flag marked and specgram has a batch dimension if self.iid_masks and specgram.dim() == 4: return F.mask_along_axis_iid(specgram, self.mask_param, mask_value, self.axis + 1) else: return F.mask_along_axis(specgram, self.mask_param, mask_value, self.axis)
def test_mask_along_axis(self, shape, mask_param, mask_value, axis): torch.random.manual_seed(42) specgram = torch.randn(*shape, dtype=self.dtype, device=self.device) mask_specgram = F.mask_along_axis(specgram, mask_param, mask_value, axis) other_axis = 1 if axis == 2 else 2 masked_columns = (mask_specgram == mask_value).sum(other_axis) num_masked_columns = (masked_columns == mask_specgram.size(other_axis)).sum() num_masked_columns = torch.div( num_masked_columns, mask_specgram.size(0), rounding_mode='floor') assert mask_specgram.size() == specgram.size() assert num_masked_columns < mask_param
def func(tensor): mask_param = 100 mask_value = 30. axis = 2 return F.mask_along_axis(tensor, mask_param, mask_value, axis)