Ejemplo n.º 1
0
    def __init__(self, config: dict):
        super().__init__(config)
        self.spect = Spectrum(config)
        self.cmvn = CMVN(config)

        # global cmvn dim == feature dim
        if config.type == 'MelSpectrum' and self.cmvn.global_cmvn:
            assert config.filterbank_channel_count * config.channel == len(config.global_mean), \
                'Error, feature dim {} is not equals to cmvn dim {}'. \
                    format(config.filterbank_channel_count * config.channel,
                           len(config.global_mean))
Ejemplo n.º 2
0
    def __init__(self, config: dict):
        super().__init__(config)
        self.spect = Spectrum(config)
        self.cmvn = CMVN(config)

        # global cmvn dim == feature dim
        if config.type == "Fbank" and self.cmvn.global_cmvn:
            assert config.filterbank_channel_count * config.channel == len(
                config.global_mean
            ), "Error, feature dim {} is not equals to cmvn dim {}".format(
                config.filterbank_channel_count * config.channel,
                len(config.global_mean),
            )
        print("Fbank params: ", self.config)
Ejemplo n.º 3
0
    def test_spectrum(self):
        wav_path_16k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav"))
        wav_path_8k = str(
            Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav"))

        with self.session():
            for wav_file in [wav_path_8k, wav_path_16k]:
                read_wav = ReadWav.params().instantiate()
                input_data, sample_rate = read_wav(wav_file)

                spectrum = Spectrum.params({
                    "window_length": 0.025,
                    "dither": 0.0
                }).instantiate()
                spectrum_test = spectrum(input_data, sample_rate)

                output_true = np.array([
                    [9.819611, 2.84503, 3.660894, 2.7779, 1.212233],
                    [9.328745, 2.553949, 3.276319, 3.000918, 2.499342],
                ])
                if tf.executing_eagerly():
                    self.assertEqual(tf.rank(spectrum_test).numpy(), 2)
                else:
                    self.assertEqual(tf.rank(spectrum_test).eval(), 2)

                if wav_file == wav_path_16k:
                    if tf.executing_eagerly():
                        self.assertAllClose(
                            spectrum_test.numpy()[0:2, 0:5],
                            output_true,
                            rtol=1e-05,
                            atol=1e-05,
                        )
                    else:
                        self.assertAllClose(
                            spectrum_test.eval()[0:2, 0:5],
                            output_true,
                            rtol=1e-05,
                            atol=1e-05,
                        )
Ejemplo n.º 4
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains thirteen optional parameters:upper_frequency_limit(float, default=0),
    lower_frequency_limit(float, default=60.0), filterbank_channel_count(float, default=40.0),
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(Spectrum.params({"output_type": 1, "is_fbank": True}))

        # fbank
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        hparams.add_hparam("upper_frequency_limit", upper_frequency_limit)
        hparams.add_hparam("lower_frequency_limit", lower_frequency_limit)
        hparams.add_hparam("filterbank_channel_count",
                           filterbank_channel_count)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam("delta_delta", delta_delta)
        hparams.add_hparam("order", order)
        hparams.add_hparam("window", window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = "Fbank"

        hparams.add_hparam("channel", 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams
Ejemplo n.º 5
0
    def params(cls, config=None):
        """
        Set params.
        :param config: contains thirteen optional parameters:
                --window_length				: Window length in seconds. (float, default = 0.025)
                --frame_length				: Hop length in seconds. (float, default = 0.010)
                --snip_edges				: If True, the last frame (shorter than window_length) will be
                                              cutoff. If False, 1 // 2 frame_length data will be padded
                                              to data. (bool, default = True)
                ---raw_energy				: If 1, compute frame energy before preemphasis and
                                              windowing. If 2,  compute frame energy after
                                              preemphasis and windowing. (int, default = 1)
                --preEph_coeff				: Coefficient for use in frame-signal preemphasis.
                                             (float, default = 0.0)
                --window_type				: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                             (string, default = "hann")
                --remove_dc_offset			: Subtract mean from waveform on each frame.
                                              (bool, default = false)
                --is_fbank					: If true, compute power spetrum without frame energy.
                                              If false, using the frame energy instead of the
                                              square of the constant component of the signal.
                                              (bool, default = true)
                --output_type				: If 1, return power spectrum. If 2, return log-power
                                              spectrum. If 3, return magnitude spectrum. (int, default = 3)
                --upper_frequency_limit		: High cutoff frequency for mel bins (if <= 0, offset
                                             from Nyquist) (float, default = 0)
                --lower_frequency_limit		: Low cutoff frequency for mel bins (float, default = 20)
                --filterbank_channel_count	: Number of triangular mel-frequency bins.
                                             (float, default = 23)
                --dither			    	: Dithering constant (0.0 means no dither).
                                             (float, default = 0) [add robust to training]
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(
            Spectrum.params({
                'output_type': 3,
                'is_fbank': True,
                'preEph_coeff': 0.0,
                'window_type': 'hann',
                'dither': 0.0,
                'remove_dc_offset': False
            }))

        # mel_spectrum
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        sample_rate = -1
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('sample_rate', sample_rate)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam('delta_delta', delta_delta)
        hparams.add_hparam('order', order)
        hparams.add_hparam('window', window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = 'MelSpectrum'

        hparams.add_hparam('channel', 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams
Ejemplo n.º 6
0
    def params(cls, config=None):
        """Set params.

        Args:
            config: contains the following thirteen optional parameters:

            'window_length': Window length in seconds. (float, default = 0.025)
            'frame_length': Hop length in seconds. (float, default = 0.010)
            'snip_edges': If 1, the last frame (shorter than window_length) will be
                          cutoff. If 2, 1 // 2 frame_length data will be padded
                          to data. (int, default = 1)
            'preEph_coeff': Coefficient for use in frame-signal preemphasis.
                            (float, default = 0.97)
            'window_type': Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                            (string, default = "povey")
            'remove_dc_offset': Subtract mean from waveform on each frame.
                                (bool, default = true)
            'is_fbank': If true, compute power spetrum without frame energy.
                          If false, using the frame energy instead of the
                          square of the constant component of the signal.
                          (bool, default = true)
            'is_log10': If true, using log10 to fbank. If false, using loge.
                        (bool, default = false)
            'output_type': If 1, return power spectrum. If 2, return log-power
                            spectrum. (int, default = 1)
            'upper_frequency_limit': High cutoff frequency for mel bins (if <= 0, offset
                                      from Nyquist) (float, default = 0)
            'lower_frequency_limit': Low cutoff frequency for mel bins (float, default = 20)
            'filterbank_channel_count': Number of triangular mel-frequency bins.
                                        (float, default = 23)
            'dither': Dithering constant (0.0 means no dither).
                      (float, default = 1) [add robust to training]

        Note:
            Return an object of class HParams, which is a set of hyperparameters as
            name-value pairs.
        """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(Spectrum.params({"output_type": 1, "is_fbank": True}))

        # fbank
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        is_log10 = False
        hparams.add_hparam("upper_frequency_limit", upper_frequency_limit)
        hparams.add_hparam("lower_frequency_limit", lower_frequency_limit)
        hparams.add_hparam("filterbank_channel_count",
                           filterbank_channel_count)
        hparams.add_hparam('is_log10', is_log10)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam("delta_delta", delta_delta)
        hparams.add_hparam("order", order)
        hparams.add_hparam("window", window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = "Fbank"

        hparams.add_hparam("channel", 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams