Ejemplo n.º 1
0
def get_window(window_type: str,
               window_length_in_samp: int,
               device: Optional[torch.device] = None) -> torch.Tensor:
    # Increase precision in order to achieve parity with scipy.signal.windows.get_window implementation
    if window_type == "bartlett":
        return torch.bartlett_window(window_length_in_samp,
                                     periodic=False,
                                     dtype=torch.float64,
                                     device=device).to(torch.float32)
    elif window_type == "blackman":
        return torch.blackman_window(window_length_in_samp,
                                     periodic=False,
                                     dtype=torch.float64,
                                     device=device).to(torch.float32)
    elif window_type == "hamming":
        return torch.hamming_window(window_length_in_samp,
                                    periodic=False,
                                    dtype=torch.float64,
                                    device=device).to(torch.float32)
    elif window_type == "hann":
        return torch.hann_window(window_length_in_samp,
                                 periodic=False,
                                 dtype=torch.float64,
                                 device=device).to(torch.float32)
    else:
        raise ValueError(f"Unknown window type: {window_type}")
Ejemplo n.º 2
0
 def spectral_ops(self):
     a = torch.randn(10)
     b = torch.randn(10, 8, 4, 2)
     return (
         torch.stft(a, 8),
         torch.istft(b, 8),
         torch.bartlett_window(2, dtype=torch.float),
         torch.blackman_window(2, dtype=torch.float),
         torch.hamming_window(4, dtype=torch.float),
         torch.hann_window(4, dtype=torch.float),
         torch.kaiser_window(4, dtype=torch.float),
     )
Ejemplo n.º 3
0
    def __init__(self,
                 n_fft=4096,
                 n_hop=1024,
                 center=False,
                 kind_window='hann'):
        super(STFT, self).__init__()

        # choose stft window
        if kind_window == 'bartlett':
            window = torch.bartlett_window(n_fft)
        elif kind_window == 'gaussian':
            window = gaussian_window(n_fft, 1 / sqrt(pi))
        elif kind_window == 'hamming':
            window = torch.hamming_window(n_fft)
        elif kind_window == 'hann':
            window = torch.hann_window(n_fft)
        else:
            raise NotImplementedError
        self.window = nn.Parameter(window, requires_grad=False)

        self.n_fft = n_fft
        self.n_hop = n_hop
        self.center = center
Ejemplo n.º 4
0
    def _create_fb_matrix(self, n_fft):
        """ Create a frequency bin conversion matrix.

        Args:
            n_fft (int): number of filter banks from spectrogram
        """

        m_min = 0. if self.f_min == 0 else 2595 * np.log10(1. +
                                                           (self.f_min / 700))
        m_max = 2595 * np.log10(1. + (self.f_max / 700))

        m_pts = torch.linspace(m_min, m_max, self.n_mels + 2)
        f_pts = (700 * (10**(m_pts / 2595) - 1))

        bins = torch.floor(((n_fft - 1) * 2) * f_pts / self.sr).long()

        fb = torch.zeros(n_fft, self.n_mels, dtype=torch.float)
        for m in range(1, self.n_mels + 1):
            f_m_minus = bins[m - 1].item()
            f_m_plus = bins[m + 1].item()
            fb[f_m_minus:f_m_plus,
               m - 1] = torch.bartlett_window(f_m_plus - f_m_minus)
        return fb
Ejemplo n.º 5
0
def _apply_probability_distribution(waveform, density_function="TPDF"):
    # type: (Tensor, str) -> Tensor
    r"""Apply a probability distribution function on a waveform.

    Triangular probability density function (TPDF) dither noise has a
    triangular distribution; values in the center of the range have a higher
    probability of occurring.

    Rectangular probability density function (RPDF) dither noise has a
    uniform distribution; any value in the specified range has the same
    probability of occurring.

    Gaussian probability density function (GPDF) has a normal distribution.
    The relationship of probabilities of results follows a bell-shaped,
    or Gaussian curve, typical of dither generated by analog sources.
    Args:
        waveform (torch.Tensor): Tensor of audio of dimension (channel, time)
        probability_density_function (string): The density function of a
           continuous random variable (Default: `TPDF`)
           Options: Triangular Probability Density Function - `TPDF`
                    Rectangular Probability Density Function - `RPDF`
                    Gaussian Probability Density Function - `GPDF`
    Returns:
        torch.Tensor: waveform dithered with TPDF
    """
    shape = waveform.size()
    waveform = waveform.reshape(-1, shape[-1])

    channel_size = waveform.size()[0] - 1
    time_size = waveform.size()[-1] - 1

    random_channel = int(torch.randint(channel_size, [1, ]).item()) if channel_size > 0 else 0
    random_time = int(torch.randint(time_size, [1, ]).item()) if time_size > 0 else 0

    number_of_bits = 16
    up_scaling = 2 ** (number_of_bits - 1) - 2
    signal_scaled = waveform * up_scaling
    down_scaling = 2 ** (number_of_bits - 1)

    signal_scaled_dis = waveform
    if (density_function == "RPDF"):
        RPDF = waveform[random_channel][random_time] - 0.5

        signal_scaled_dis = signal_scaled + RPDF
    elif (density_function == "GPDF"):
        # TODO Replace by distribution code once
        # https://github.com/pytorch/pytorch/issues/29843 is resolved
        # gaussian = torch.distributions.normal.Normal(torch.mean(waveform, -1), 1).sample()

        num_rand_variables = 6

        gaussian = waveform[random_channel][random_time]
        for ws in num_rand_variables * [time_size]:
            rand_chan = int(torch.randint(channel_size, [1, ]).item())
            gaussian += waveform[rand_chan][int(torch.randint(ws, [1, ]).item())]

        signal_scaled_dis = signal_scaled + gaussian
    else:
        TPDF = torch.bartlett_window(time_size + 1)
        TPDF = TPDF.repeat((channel_size + 1), 1)
        signal_scaled_dis = signal_scaled + TPDF

    quantised_signal_scaled = torch.round(signal_scaled_dis)
    quantised_signal = quantised_signal_scaled / down_scaling
    return quantised_signal.reshape(shape[:-1] + quantised_signal.shape[-1:])