Exemple #1
0
def test_filterbank(device):

    from speechbrain.processing.features import Filterbank

    compute_fbanks = Filterbank().to(device)
    inputs = torch.ones([10, 101, 201], device=device)
    assert torch.jit.trace(compute_fbanks, inputs)

    # Check amin (-100 dB)
    inputs = torch.zeros([10, 101, 201], device=device)
    fbanks = compute_fbanks(inputs)
    assert torch.equal(fbanks, torch.ones_like(fbanks) * -100)

    # Check top_db
    fbanks = torch.zeros([1, 1, 1], device=device)
    expected = torch.Tensor([[[-100]]]).to(device)
    fbanks_db = compute_fbanks._amplitude_to_DB(fbanks)
    assert torch.equal(fbanks_db, expected)

    # Making sure independent computation gives same results
    # as the batch computation
    input1 = torch.rand([1, 101, 201], device=device) * 10
    input2 = torch.rand([1, 101, 201], device=device)
    input3 = torch.cat([input1, input2], dim=0)
    fbank1 = compute_fbanks(input1)
    fbank2 = compute_fbanks(input2)
    fbank3 = compute_fbanks(input3)
    assert torch.sum(torch.abs(fbank1[0] - fbank3[0])) < 8e-05
    assert torch.sum(torch.abs(fbank2[0] - fbank3[1])) < 8e-05
Exemple #2
0
    def __init__(
        self,
        deltas=False,
        context=False,
        requires_grad=False,
        sample_rate=16000,
        n_fft=400,
        n_mels=40,
        filter_shape="triangular",
        param_change_factor=1.0,
        param_rand_factor=0.0,
        left_frames=5,
        right_frames=5,
    ):
        super().__init__()
        self.deltas = deltas
        self.context = context
        self.requires_grad = requires_grad

        self.compute_STFT = STFT(sample_rate=sample_rate, n_fft=n_fft)
        self.compute_fbanks = Filterbank(
            n_fft=n_fft,
            n_mels=n_mels,
            f_min=0,
            f_max=sample_rate / 2,
            freeze=not requires_grad,
            filter_shape=filter_shape,
            param_change_factor=param_change_factor,
            param_rand_factor=param_rand_factor,
        )
        self.compute_deltas = Deltas(input_size=n_mels)
        self.context_window = ContextWindow(
            left_frames=left_frames,
            right_frames=right_frames,
        )
Exemple #3
0
def test_filterbank():

    from speechbrain.processing.features import Filterbank

    compute_fbanks = Filterbank()
    inputs = torch.ones([10, 101, 201])
    assert torch.jit.trace(compute_fbanks, inputs)
Exemple #4
0
def test_features_multimic(device):

    from speechbrain.processing.features import Filterbank

    compute_fbanks = Filterbank().to(device)
    inputs = torch.rand([10, 101, 201], device=device)
    output = compute_fbanks(inputs)
    inputs_ch2 = torch.stack((inputs, inputs), -1)
    output_ch2 = compute_fbanks(inputs_ch2)
    output_ch2 = output_ch2[..., 0]
    assert torch.sum(output - output_ch2) < 1e-05
Exemple #5
0
    def __init__(
        self,
        deltas=True,
        context=True,
        requires_grad=False,
        sample_rate=16000,
        f_min=0,
        f_max=None,
        n_fft=400,
        n_mels=23,
        n_mfcc=20,
        filter_shape="triangular",
        param_change_factor=1.0,
        param_rand_factor=0.0,
        left_frames=5,
        right_frames=5,
        win_length=25,
        hop_length=10,
    ):
        super().__init__()
        self.deltas = deltas
        self.context = context
        self.requires_grad = requires_grad

        if f_max is None:
            f_max = sample_rate / 2

        self.compute_STFT = STFT(
            sample_rate=sample_rate,
            n_fft=n_fft,
            win_length=win_length,
            hop_length=hop_length,
        )

        self.compute_fbanks = Filterbank(
            sample_rate=sample_rate,
            n_fft=n_fft,
            n_mels=n_mels,
            f_min=f_min,
            f_max=f_max,
            freeze=not requires_grad,
            filter_shape=filter_shape,
            param_change_factor=param_change_factor,
            param_rand_factor=param_rand_factor,
        )
        self.compute_dct = DCT(input_size=n_mels, n_out=n_mfcc)
        self.compute_deltas = Deltas(input_size=n_mfcc)
        self.context_window = ContextWindow(
            left_frames=left_frames,
            right_frames=right_frames,
        )