def get_window(window_type: str, window_length_in_samp: int, device: Optional[torch.device] = None) -> torch.Tensor: # Increase precision in order to achieve parity with scipy.signal.windows.get_window implementation if window_type == "bartlett": return torch.bartlett_window(window_length_in_samp, periodic=False, dtype=torch.float64, device=device).to(torch.float32) elif window_type == "blackman": return torch.blackman_window(window_length_in_samp, periodic=False, dtype=torch.float64, device=device).to(torch.float32) elif window_type == "hamming": return torch.hamming_window(window_length_in_samp, periodic=False, dtype=torch.float64, device=device).to(torch.float32) elif window_type == "hann": return torch.hann_window(window_length_in_samp, periodic=False, dtype=torch.float64, device=device).to(torch.float32) else: raise ValueError(f"Unknown window type: {window_type}")
def spectral_ops(self): a = torch.randn(10) b = torch.randn(10, 8, 4, 2) return ( torch.stft(a, 8), torch.istft(b, 8), torch.bartlett_window(2, dtype=torch.float), torch.blackman_window(2, dtype=torch.float), torch.hamming_window(4, dtype=torch.float), torch.hann_window(4, dtype=torch.float), torch.kaiser_window(4, dtype=torch.float), )
def __init__(self, n_fft=4096, n_hop=1024, center=False, kind_window='hann'): super(STFT, self).__init__() # choose stft window if kind_window == 'bartlett': window = torch.bartlett_window(n_fft) elif kind_window == 'gaussian': window = gaussian_window(n_fft, 1 / sqrt(pi)) elif kind_window == 'hamming': window = torch.hamming_window(n_fft) elif kind_window == 'hann': window = torch.hann_window(n_fft) else: raise NotImplementedError self.window = nn.Parameter(window, requires_grad=False) self.n_fft = n_fft self.n_hop = n_hop self.center = center
def _create_fb_matrix(self, n_fft): """ Create a frequency bin conversion matrix. Args: n_fft (int): number of filter banks from spectrogram """ m_min = 0. if self.f_min == 0 else 2595 * np.log10(1. + (self.f_min / 700)) m_max = 2595 * np.log10(1. + (self.f_max / 700)) m_pts = torch.linspace(m_min, m_max, self.n_mels + 2) f_pts = (700 * (10**(m_pts / 2595) - 1)) bins = torch.floor(((n_fft - 1) * 2) * f_pts / self.sr).long() fb = torch.zeros(n_fft, self.n_mels, dtype=torch.float) for m in range(1, self.n_mels + 1): f_m_minus = bins[m - 1].item() f_m_plus = bins[m + 1].item() fb[f_m_minus:f_m_plus, m - 1] = torch.bartlett_window(f_m_plus - f_m_minus) return fb
def _apply_probability_distribution(waveform, density_function="TPDF"): # type: (Tensor, str) -> Tensor r"""Apply a probability distribution function on a waveform. Triangular probability density function (TPDF) dither noise has a triangular distribution; values in the center of the range have a higher probability of occurring. Rectangular probability density function (RPDF) dither noise has a uniform distribution; any value in the specified range has the same probability of occurring. Gaussian probability density function (GPDF) has a normal distribution. The relationship of probabilities of results follows a bell-shaped, or Gaussian curve, typical of dither generated by analog sources. Args: waveform (torch.Tensor): Tensor of audio of dimension (channel, time) probability_density_function (string): The density function of a continuous random variable (Default: `TPDF`) Options: Triangular Probability Density Function - `TPDF` Rectangular Probability Density Function - `RPDF` Gaussian Probability Density Function - `GPDF` Returns: torch.Tensor: waveform dithered with TPDF """ shape = waveform.size() waveform = waveform.reshape(-1, shape[-1]) channel_size = waveform.size()[0] - 1 time_size = waveform.size()[-1] - 1 random_channel = int(torch.randint(channel_size, [1, ]).item()) if channel_size > 0 else 0 random_time = int(torch.randint(time_size, [1, ]).item()) if time_size > 0 else 0 number_of_bits = 16 up_scaling = 2 ** (number_of_bits - 1) - 2 signal_scaled = waveform * up_scaling down_scaling = 2 ** (number_of_bits - 1) signal_scaled_dis = waveform if (density_function == "RPDF"): RPDF = waveform[random_channel][random_time] - 0.5 signal_scaled_dis = signal_scaled + RPDF elif (density_function == "GPDF"): # TODO Replace by distribution code once # https://github.com/pytorch/pytorch/issues/29843 is resolved # gaussian = torch.distributions.normal.Normal(torch.mean(waveform, -1), 1).sample() num_rand_variables = 6 gaussian = waveform[random_channel][random_time] for ws in num_rand_variables * [time_size]: rand_chan = int(torch.randint(channel_size, [1, ]).item()) gaussian += waveform[rand_chan][int(torch.randint(ws, [1, ]).item())] signal_scaled_dis = signal_scaled + gaussian else: TPDF = torch.bartlett_window(time_size + 1) TPDF = TPDF.repeat((channel_size + 1), 1) signal_scaled_dis = signal_scaled + TPDF quantised_signal_scaled = torch.round(signal_scaled_dis) quantised_signal = quantised_signal_scaled / down_scaling return quantised_signal.reshape(shape[:-1] + quantised_signal.shape[-1:])