Ejemplo n.º 1
0
    def test_hparams(self):
        hparams = HParams(cls=self.__class__, name='fbank', n_mels=40)
        hparams.del_hparam('cls')
        self.assertEqual(hparams.name, 'fbank')
        self.assertEqual(hparams.n_mels, 40)
        self.assertDictEqual(hparams.values(), {'name': 'fbank', 'n_mels': 40})

        hparams.add_hparam('sr', 8000)
        self.assertEqual(hparams.sr, 8000)

        hparams.set_hparam('sr', 16000)
        self.assertEqual(hparams.sr, 16000)
        self.assertEqual(hparams.get('sr'), 16000)

        hparams.del_hparam('sr')
        self.assertJsonEqual(hparams.to_json(),
                             '{"name": "fbank", "n_mels": 40}')

        self.assertEqual('name' in hparams, True)
        self.assertEqual(hparams['name'], 'fbank')
        self.assertEqual(hparams['n_mels'], 40)

        hparams['n_mels'] = 80
        self.assertEqual(hparams['n_mels'], 80)

        hparams2 = copy.deepcopy(hparams)
        self.assertEqual(hparams == hparams2, True)
        self.assertEqual(hparams != hparams2, False)

        hparams2['name'] = 'MFCC'
        self.assertEqual(hparams == hparams2, False)
        self.assertEqual(hparams != hparams2, True)
Ejemplo n.º 2
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains five optional parameters:
        --sample_rate       : Waveform data sample frequency (must match the waveform
                            file, if specified there). (float, default = 16000)
        --window_length		 : Window length in seconds. (float, default = 0.025)
        --frame_length		 : Hop length in seconds. (float, default = 0.010)
        --ceps_subband_num : Number of Ceps_subband. (int, default=13).
        --tag_ceps_mean_norm : Flag of tag_ceps_mean_norm. (bool, default=True).
    :return:An object of class HParams, which is a set of hyperparameters as
            name-value pairs.
    """

        window_length = 0.025
        frame_length = 0.010
        ceps_subband_num = 13
        tag_ceps_mean_norm = True
        sample_rate = 16000

        hparams = HParams(cls=cls)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('ceps_subband_num', ceps_subband_num)
        hparams.add_hparam('tag_ceps_mean_norm', tag_ceps_mean_norm)
        hparams.add_hparam('sample_rate', sample_rate)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 3
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains seven optional parameters:upper_frequency_limit(float, default=4000.0),
    lower_frequency_limit(float, default=20.0), filterbank_channel_count(float, default=40.0),
    window_length(float, default=0.025), frame_length(float, default=0.010),
    output_type(int, default=2), sample_rate(float, default=16000).
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        upper_frequency_limit = 4000.0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 40.0
        window_length = 0.025
        frame_length = 0.010
        output_type = 2
        sample_rate = 16000.0

        hparams = HParams(cls=cls)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('sample_rate', sample_rate)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 4
0
  def params(cls, config=None):
    """
    Set params.
    :param config: contains five optional parameters:
        --sample_rate       : Waveform data sample frequency (must match the waveform
                             file, if specified there). (float, default = 16000)
        --window_length		 : Window length in seconds. (float, default = 0.025)
        --frame_length		 : Hop length in seconds. (float, default = 0.010)
        --snip_edges			 : If True, the last frame (shorter than window_length)
                              will be cutoff. If False, 1 // 2 frame_length data will
                              be padded to data. (int, default = True)
        --remove_dc_offset : Subtract mean from waveform on each frame (bool, default = true)
    :return:An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

    window_length = 0.025
    frame_length = 0.010
    snip_edges = True
    remove_dc_offset = True
    sample_rate = 16000

    hparams = HParams(cls=cls)
    hparams.add_hparam('window_length', window_length)
    hparams.add_hparam('frame_length', frame_length)
    hparams.add_hparam('snip_edges', snip_edges)
    hparams.add_hparam('remove_dc_offset', remove_dc_offset)
    hparams.add_hparam('sample_rate', sample_rate)

    if config is not None:
      hparams.override_from_dict(config)

    return hparams
Ejemplo n.º 5
0
  def params(cls, config=None):
    """
    Set params.
    :param config: contains five optional parameters:window_length(float, default=0.025),
          frame_length(float, default=0.010), sample_rate(float, default=16000.0),
          ceps_subband_num(int, default=13), tag_ceps_mean_norm(bool, default=True).
    :return:An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

    window_length = 0.025
    frame_length = 0.010
    ceps_subband_num = 13
    tag_ceps_mean_norm = True
    sample_rate = 16000.0

    hparams = HParams(cls=cls)
    hparams.add_hparam('window_length', window_length)
    hparams.add_hparam('frame_length', frame_length)
    hparams.add_hparam('ceps_subband_num', ceps_subband_num)
    hparams.add_hparam('tag_ceps_mean_norm', tag_ceps_mean_norm)
    hparams.add_hparam('sample_rate', sample_rate)

    if config is not None:
      hparams.override_from_dict(config)

    return hparams
Ejemplo n.º 6
0
    def params(cls, config: dict = None):
        embedding_size = 512

        #hp = HParams(cls=cls)
        hp = HParams(cls=cls)
        hp.add_hparam('embedding_size', embedding_size)

        if config is not None:
            hp.override_from_dict(config)
        return hp
Ejemplo n.º 7
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains four optional parameters:
        --sample_rate       : Waveform data sample frequency (must match the waveform
                             file, if specified there). (float, default = 16000)
        --window_length		 : Window length in seconds. (float, default = 0.025)
        --frame_length		 : Hop length in seconds. (float, default = 0.010)
        --plp_order        : Plp order. (int, default=12).
    :return:An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        window_length = 0.025
        frame_length = 0.010
        plp_order = 12
        sample_rate = 16000

        hparams = HParams(cls=cls)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('plp_order', plp_order)
        hparams.add_hparam('sample_rate', sample_rate)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 8
0
  def params(cls, config=None):
    """
      Set params.
       :param config: contains one optional parameters:sample_rate(float, default=16000.0).
       :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
       """

    sample_rate = 16000.0

    hparams = HParams(cls=cls)
    hparams.add_hparam('sample_rate', sample_rate)

    if config is not None:
      hparams.override_from_dict(config)

    return hparams
Ejemplo n.º 9
0
  def params(cls, config=None):
    """
      Set params.
       :param config: contains two optional parameters: audio_channels(int, default=1),
              sample_rate(int, default=16000).
       :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
       """
    audio_channels = 1
    sample_rate = 16000

    hparams = HParams(cls=cls)
    hparams.add_hparam('audio_channels', audio_channels)
    hparams.add_hparam('sample_rate', sample_rate)

    if config is not None:
      hparams.override_from_dict(config)

    return hparams
Ejemplo n.º 10
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains four optional parameters:window_length(float, default=0.025),
          frame_length(float, default=0.010), sample_rate(float, default=16000.0),
          plp_order(int, default=12).
    :return:An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        window_length = 0.025
        frame_length = 0.010
        plp_order = 12
        sample_rate = 16000.0

        hparams = HParams(cls=cls)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('plp_order', plp_order)
        hparams.add_hparam('sample_rate', sample_rate)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 11
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains eight optional parameters:upper_frequency_limit(float, default=4000.0),
    lower_frequency_limit(float, default=20.0), filterbank_channel_count(float, default=40.0),
    window_length(float, default=0.025), frame_length(float, default=0.010),
    thres_autoc(float, default=0.3), output_type(int, default=2), sample_rate(int, default=16000).
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        upper_frequency_limit = 8000.0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        snip_edges = 2
        raw_energy = 1
        preeph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True

        thres_autoc = 0.3
        output_type = 1
        sample_rate = 16000

        hparams = HParams(cls=cls)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('sample_rate', sample_rate)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preeph_coeff', preeph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)
        hparams.add_hparam('thres_autoc', thres_autoc)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 12
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains fifthteen optional parameters.
        --sample_rate				  : Sample frequency of waveform data. (int, default = 16000)
        --window_length				: Window length in seconds. (float, default = 0.025)
        --frame_length				: Hop length in seconds. (float, default = 0.010)
        --snip_edges				  : If 1, the last frame (shorter than window_length) will be cutoff. If 2, 1 // 2 frame_length data will be padded to data. (int, default = 1)
        ---raw_energy				  : If 1, compute frame energy before preemphasis and windowing. If 2,  compute frame energy after preemphasis and windowing. (int, default = 1)
        --preeph_coeff				: Coefficient for use in frame-signal preemphasis. (float, default = 0.97)
        --window_type				  : Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria"). (string, default = "povey")
        --remove_dc_offset		: Subtract mean from waveform on each frame (bool, default = true)
        --is_fbank					  : If true, compute power spetrum without frame energy. If false, using the frame energy instead of the square of the constant component of the signal. (bool, default = true)
        --output_type				  : If 1, return power spectrum. If 2, return log-power spectrum. (int, default = 1)
        --upper_frequency_limit		        : High cutoff frequency for mel bins (if < 0, offset from Nyquist) (float, default = 0)
        --lower_frequency_limit		        : Low cutoff frequency for mel bins (float, default = 20)
        --filterbank_channel_count	      : Number of triangular mel-frequency bins (float, default = 23)
        --coefficient_count                 : Number of cepstra in MFCC computation.(int, default = 13)
        --cepstral_lifter                 : Constant that controls scaling of MFCCs.(float, default = 22)
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        upper_frequency_limit = 0.0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        output_type = 1
        sample_rate = 16000
        snip_edges = True
        raw_energy = 1
        preeph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True
        cepstral_lifter = 22.0
        coefficient_count = 13
        use_energy = True

        hparams = HParams(cls=cls)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('sample_rate', sample_rate)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preeph_coeff', preeph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)
        hparams.add_hparam('cepstral_lifter', cepstral_lifter)
        hparams.add_hparam('coefficient_count', coefficient_count)
        hparams.add_hparam('use_energy', use_energy)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 13
0
def speech_params(sr=16000,
                  bins=40,
                  dither=True,
                  use_delta_deltas=True,
                  cmvn=False,
                  cmvn_path=''):
    ''' feat params '''
    p = HParams()
    p.add_hparam("audio_sample_rate", sr)
    p.add_hparam("audio_channels", 1)
    p.add_hparam("audio_preemphasis", 0.97)
    if dither:
        p.add_hparam("audio_dither", 1.0 / np.iinfo(np.int16).max)
    else:
        p.add_hparam("audio_dither", 0.0)
    p.add_hparam("audio_frame_length", 25.0)
    p.add_hparam("audio_frame_step", 10.0)
    p.add_hparam("audio_lower_edge_hertz", 20.0)
    p.add_hparam("audio_upper_edge_hertz", sr / 2.0)
    p.add_hparam("audio_num_mel_bins", bins)
    p.add_hparam("audio_add_delta_deltas", use_delta_deltas)
    p.add_hparam("num_zeropad_frames", 0)
    p.add_hparam("audio_global_cmvn", cmvn)
    p.add_hparam("audio_cmvn_path", cmvn_path)
    return p
Ejemplo n.º 14
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains ten optional parameters.
          --sample_rate			: Sample frequency of waveform data. (int, default = 16000)
          --window_length		: Window length in seconds. (float, default = 0.025)
          --frame_length			: Hop length in seconds. (float, default = 0.010)
          --snip_edges			: If 1, the last frame (shorter than window_length) will be cutoff. If 2, 1 // 2 frame_length data will be padded to data. (int, default = 1)
          ---raw_energy			: If 1, compute frame energy before preemphasis and windowing. If 2,  compute frame energy after preemphasis and windowing. (int, default = 1)
          --preeph_coeff			: Coefficient for use in frame-signal preemphasis. (float, default = 0.97)
          --window_type			: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria"). (string, default = "povey")
          --remove_dc_offset		: Subtract mean from waveform on each frame (bool, default = true)
          --is_fbank				: If true, compute power spetrum without frame energy. If false, using the frame energy instead of the square of the constant component of the signal. (bool, default = false)
          --output_type			: If 1, return power spectrum. If 2, return log-power spectrum. (int, default = 2)
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        window_length = 0.025
        frame_length = 0.010
        output_type = 2
        sample_rate = 16000
        snip_edges = 2
        raw_energy = 1
        preeph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = False

        hparams = HParams(cls=cls)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('sample_rate', sample_rate)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preeph_coeff', preeph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 15
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains thirteen optional parameters:
            --window_length				: Window length in seconds. (float, default = 0.025)
            --frame_length				: Hop length in seconds. (float, default = 0.010)
            --snip_edges				: If True, the last frame (shorter than window_length) will be
                                          cutoff. If False, 1 // 2 frame_length data will be padded
                                          to data. (bool, default = True)
            ---raw_energy				: If 1, compute frame energy before preemphasis and
                                          windowing. If 2,  compute frame energy after
                                          preemphasis and windowing. (int, default = 1)
            --preeph_coeff				: Coefficient for use in frame-signal preemphasis.
                                         (float, default = 0.0)
            --window_type				: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                         (string, default = "hann")
            --remove_dc_offset			: Subtract mean from waveform on each frame.
                                          (bool, default = false)
            --is_fbank					: If true, compute power spetrum without frame energy.
                                          If false, using the frame energy instead of the
                                          square of the constant component of the signal.
                                          (bool, default = true)
            --output_type				: If 1, return power spectrum. If 2, return log-power
                                          spectrum. If 3, return magnitude spectrum. (int, default = 3)
            --upper_frequency_limit		: High cutoff frequency for mel bins (if <= 0, offset
                                         from Nyquist) (float, default = 0)
            --lower_frequency_limit		: Low cutoff frequency for mel bins (float, default = 20)
            --filterbank_channel_count	: Number of triangular mel-frequency bins.
                                         (float, default = 23)
            --dither			    	: Dithering constant (0.0 means no dither).
                                         (float, default = 0) [add robust to training]
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        hparams = HParams(cls=cls)

        window_length = 0.025
        frame_length = 0.010
        output_type = 3
        is_fbank = True
        preeph_coeff = 0.0
        window_type = 'hann'
        dither = 0.0
        remove_dc_offset = False
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        sample_rate = 16000
        snip_edges = True
        raw_energy = 1

        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('is_fbank', is_fbank)
        hparams.add_hparam('preeph_coeff', preeph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('dither', dither)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('sample_rate', sample_rate)

        return hparams
Ejemplo n.º 16
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains twenty-nine optional parameters:
          --sample_rate         : Samplerate of the signal we working with.
                                  (int, default = 16000)
          --window_length		    : Window length in seconds. (float, default = 0.025)
          --frame_length			  : Hop length in seconds. (float, default = 0.010)
          --snip_edges				  : If true, the last frame (shorter than window_length) will
                                        be cutoff. If false, 1 // 2 frame_length data will be padded
                                         to data. (bool, default = true)
          ---raw_energy				  : If 1, compute frame energy before preemphasis and
                                        windowing. If 2,  compute frame energy after preemphasis
                                         and windowing. (int, default = 1)
          --preEph_coeff			  : Coefficient for use in frame-signal preemphasis.
                                        (float, default = 0.97)
          --window_type				  : Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                        (string, default = "povey")
          --remove_dc_offset	      : Subtract mean from waveform on each frame.
                                        (bool, default = true)
          --is_fbank				  : If true, compute power spetrum without frame
                                        energy. If false, using the frame energy instead
                                         of the square of the constant component of the
                                         signal. (bool, default = true)
          --output_type				  : If 1, return power spectrum. If 2, return
                                        log-power spectrum. (int, default = 1)
          --upper_frequency_limit	  : High cutoff frequency for mel bins.
                                        (if <= 0, offset from Nyquist) (float, default = 0)
          --lower_frequency_limit	  : Low cutoff frequency for mel bins.
                                        (float, default = 20)
          --filterbank_channel_count  : Number of triangular mel-frequency bins.
                                        (float, default = 23)
          --dither			    	  : Dithering constant (0.0 means no dither).
                                        (float, default = 1)
            [add robust to training]
          --delta-pitch               : Smallest relative change in pitch that our
                                        algorithm measures. (float, default = 0.005)
          --frames-per-chunk          : Only relevant for offline pitch extraction.
                                        (e.g. compute-kaldi-pitch-feats), you can set it to a
                                        small nonzero value, such as 10, for better feature
                                        compatibility with online decoding (affects energy
                                        normalization in the algorithm) (int, default = 0)
          --lowpass-cutoff            : cutoff frequency for LowPass filter (Hz).
                                        (float, default = 1000)
          --lowpass-filter-width      : Integer that determines filter width of lowpass filter,
                                        more gives sharper filter (int, default = 1)
          --max-f0                    : max. F0 to search for (Hz) (float, default = 400)
          --max-frames-latency        : Maximum number of frames of latency that we allow pitch
                                        tracking to introduce into the feature processing
                                        (affects output only if --frames-per-chunk > 0 and
                                        --simulate-first-pass-online=true (int, default = 0)
          --min-f0                    : min. F0 to search for (Hz) (float, default = 50)
          --nccf-ballast              : Increasing this factor reduces NCCF for quiet frames.
                                        (float, default = 7000)
          --nccf-ballast-online       : This is useful mainly for debug; it affects how the
                                        NCCF ballast is computed. (bool, default = false)
          --penalty-factor            : cost factor for FO change. (float, default = 0.1)
          --preemphasis-coefficient   : Coefficient for use in signal preemphasis (deprecated)
                                        (float, default = 0)
          --recompute-frame           : Only relevant for online pitch extraction, or for
                                        compatibility with online pitch extraction.  A
                                        non-critical parameter; the frame at which we recompute
                                        some of the forward pointers, after revising our
                                        estimate of the signal energy. Relevant
                                        if--frames-per-chunk > 0. (int, default = 500)
          --resample-frequency        : Frequency that we down-sample the signal to. Must be
                                        more than twice lowpass-cutoff (float, default = 4000)
          --simulate-first-pass-online : If true, compute-kaldi-pitch-feats will output features
                                         that correspond to what an online decoder would see in
                                         the first pass of decoding-- not the final version of
                                         the features, which is the default.  Relevant if
                                         --frames-per-chunk > 0 (bool, default = false)
          --soft-min-f0               : Minimum f0, applied in soft way, must not exceed
                                        min-f0 (float, default = 10)
          --upsample-filter-width     : Integer that determines filter width when upsampling
                                        NCCF (int, default = 5)
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """
        hparams = HParams(cls=cls)

        upper_frequency_limit = 0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        raw_energy = 1
        preeph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True
        output_type = 1
        dither = 0.0
        sample_rate = 16000
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False
        is_log10 = False

        pitch_scale = 2.0
        pov_scale = 2.0
        pov_offset = 0.0
        delta_pitch_scale = 10.0
        delta_pitch_noise_stddev = 0.005
        normalization_left_context = 75
        normalization_right_context = 75
        delta_window = 2
        delay = 0
        add_pov_feature = True
        add_normalized_log_pitch = True
        add_delta_pitch = True
        add_raw_log_pitch = False

        hparams.add_hparam('sample_rate', sample_rate)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('preemph_coeff', preemph_coeff)
        hparams.add_hparam('dither', dither)
        hparams.add_hparam('min_f0', min_f0)
        hparams.add_hparam('max_f0', max_f0)
        hparams.add_hparam('soft_min_f0', soft_min_f0)
        hparams.add_hparam('penalty_factor', penalty_factor)
        hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
        hparams.add_hparam('resample_freq', resample_freq)
        hparams.add_hparam('delta_pitch', delta_pitch)
        hparams.add_hparam('nccf_ballast', nccf_ballast)
        hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
        hparams.add_hparam('upsample_filter_width', upsample_filter_width)
        hparams.add_hparam('max_frames_latency', max_frames_latency)
        hparams.add_hparam('frames_per_chunk', frames_per_chunk)
        hparams.add_hparam('simulate_first_pass_online',
                           simulate_first_pass_online)
        hparams.add_hparam('recompute_frame', recompute_frame)
        hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preeph_coeff', preeph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)
        hparams.add_hparam('is_log10', is_log10)
        hparams.add_hparam('pitch_scale', pitch_scale)
        hparams.add_hparam('pov_offset', pov_offset)
        hparams.add_hparam('pov_scale', pov_scale)
        hparams.add_hparam('delta_pitch_scale', delta_pitch_scale)
        hparams.add_hparam('delta_pitch_noise_stddev',
                           delta_pitch_noise_stddev)
        hparams.add_hparam('normalization_left_context',
                           normalization_left_context)
        hparams.add_hparam('normalization_right_context',
                           normalization_right_context)
        hparams.add_hparam('delta_window', delta_window)
        hparams.add_hparam('delay', delay)
        hparams.add_hparam('add_pov_feature', add_pov_feature)
        hparams.add_hparam('add_normalized_log_pitch',
                           add_normalized_log_pitch)
        hparams.add_hparam('add_delta_pitch', add_delta_pitch)
        hparams.add_hparam('add_raw_log_pitch', add_raw_log_pitch)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 17
0
  def params(cls, config=None):
    """
        Set params.
        :param config: contains nine optional parameters:
            --sample_rate				  : Sample frequency of waveform data. (int, default = 16000)
            --if_add_rir          : If true, add rir to audio data. (bool, default = False)
            --rir_filelist        : FileList path of rir.(string, default = 'rirlist.scp')
            --if_add_noise        : If true, add random noise to audio data. (bool, default = False)
            --snr_min             : Minimum SNR adds to signal. (float, default = 0)
            --snr_max             : Maximum SNR adds to signal. (float, default = 30)
            --noise_filelist      : FileList path of noise.(string, default = 'noiselist.scp')
            --if_add_aecres       : If true, add aecres to audio data. (bool, default = False)
            --aecres_filelist     : FileList path of aecres.(string, default = 'aecreslist.scp')
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

    sample_rate = 16000
    if_add_rir = False
    rir_filelist = 'rirlist.scp'
    if_add_noise = False
    noise_filelist = 'noiselist.scp'
    snr_min = 0
    snr_max = 30
    if_add_aecres = False
    aecres_filelist = 'aecreslist.scp'

    hparams = HParams(cls=cls)
    hparams.add_hparam('sample_rate', sample_rate)
    hparams.add_hparam('if_add_rir', if_add_rir)
    hparams.add_hparam('if_add_noise', if_add_noise)
    hparams.add_hparam('rir_filelist', rir_filelist)
    hparams.add_hparam('noise_filelist', noise_filelist)
    hparams.add_hparam('snr_min', snr_min)
    hparams.add_hparam('snr_max', snr_max)
    hparams.add_hparam('if_add_aecres', if_add_aecres)
    hparams.add_hparam('aecres_filelist', aecres_filelist)

    if config is not None:
      hparams.override_from_dict(config)

    return hparams
Ejemplo n.º 18
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains nineteen optional parameters:
          --sample_rate               : Waveform data sample frequency (must match the waveform
                                        file, if specified there). (float, default = 16000)
          --delta-pitch               : Smallest relative change in pitch that our algorithm
                                        measures (float, default = 0.005)
          --window_length             : Frame length in seconds (float, default = 0.025)
          --frame_length              : Frame shift in seconds (float, default = 0.010)
          --frames-per-chunk          : Only relevant for offline pitch extraction (e.g.
                                        compute-kaldi-pitch-feats), you can set it to a small
                                        nonzero value, such as 10, for better feature
                                        compatibility with online decoding (affects energy
                                        normalization in the algorithm) (int, default = 0)
          --lowpass-cutoff            : cutoff frequency for LowPass filter (Hz).
                                        (float, default = 1000)
          --lowpass-filter-width      : Integer that determines filter width of lowpass filter,
                                        more gives sharper filter (int, default = 1)
          --max-f0                    : max. F0 to search for (Hz) (float, default = 400)
          --max-frames-latency        : Maximum number of frames of latency that we allow pitch
                                        tracking to introduce into the feature processing
                                        (affects output only if --frames-per-chunk > 0 and
                                        --simulate-first-pass-online=true (int, default = 0)
          --min-f0                    : min. F0 to search for (Hz) (float, default = 50)
          --nccf-ballast              : Increasing this factor reduces NCCF for quiet frames.
                                        (float, default = 7000)
          --nccf-ballast-online       : This is useful mainly for debug; it affects how the NCCF
                                        ballast is computed. (bool, default = false)
          --penalty-factor            : cost factor for FO change. (float, default = 0.1)
          --preemphasis-coefficient   : Coefficient for use in signal preemphasis (deprecated).
                                        (float, default = 0)
          --recompute-frame           : Only relevant for online pitch extraction, or for
                                        compatibility with online pitch extraction.  A
                                        non-critical parameter; the frame at which we recompute
                                        some of the forward pointers, after revising our
                                        estimate of the signal energy.  Relevant
                                        if--frames-per-chunk > 0. (int, default = 500)
          --resample-frequency        : Frequency that we down-sample the signal to.  Must be
                                        more than twice lowpass-cutoff (float, default = 4000)
          --simulate-first-pass-online : If true, compute-kaldi-pitch-feats will output features
                                        that correspond to what an online decoder would see in
                                        the first pass of decoding-- not the final version of
                                        the features, which is the default.  Relevant if
                                        --frames-per-chunk > 0 (bool, default = false)
          --snip-edges                : If this is set to false, the incomplete frames near the
                                        ending edge won't be snipped, so that the number of
                                        frames is the file size divided by the frame-shift.
                                        This makes different types of features give the same
                                        number of frames. (bool, default = true)
          --soft-min-f0               : Minimum f0, applied in soft way, must not exceed min-f0.
                                        (float, default = 10)
          --upsample-filter-width     : Integer that determines filter width when upsampling
                                        NCCF. (int, default = 5)
          --add-delta-pitch           : If true, time derivative of log-pitch is added to
                                        output features. (bool, default = true)
          --add-pov-feature           : If true, the warped NCCF is added to output features.
                                        (bool, default = true)
          --add-raw-log-pitch         : If true, log(pitch) is added to output features.
                                        (bool, default = false)
          --delay                     : Number of frames by which the pitch information is
                                        delayed. (int, default = 0)
          --delta-pitch-noise-stddev  : Standard deviation for noise we add to the delta
                                        log-pitch (before scaling); should be about the same as
                                        delta-pitch option to pitch creation.  The purpose is
                                        to get rid of peaks in the delta-pitch caused by
                                        discretization of pitch values. (float, default = 0.005)
          --delta-pitch-scale         : Term to scale the final delta log-pitch feature.
                                        (float, default = 10)
          --delta-window              : Number of frames on each side of central frame,
                                        to use for delta window. (int, default = 2)
          --normalization-left-context : Left-context (in frames) for moving window
                                        normalization. (int, default = 75)
          --normalization-right-context : Right-context (in frames) for moving window
                                        normalization. (int, default = 75)
          --pitch-scale               : Scaling factor for the final normalized log-pitch
                                        value. (float, default = 2)
          --pov-offset                : This can be used to add an offset to the POV feature.
                                        Intended for use in online decoding as a substitute
                                        for  CMN. (float, default = 0)
          --pov-scale                 : Scaling factor for final POV (probability of voicing)
                                        feature. (float, default = 2)
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

        hparams = HParams(cls=cls)
        window_length = 0.025
        frame_length = 0.010
        sample_rate = 16000
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False

        pitch_scale = 2.0
        pov_scale = 2.0
        pov_offset = 0.0
        delta_pitch_scale = 10.0
        delta_pitch_noise_stddev = 0.005
        normalization_left_context = 75
        normalization_right_context = 75
        delta_window = 2
        delay = 0
        add_pov_feature = True
        add_normalized_log_pitch = True
        add_delta_pitch = True
        add_raw_log_pitch = False

        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('sample_rate', sample_rate)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('preemph_coeff', preemph_coeff)
        hparams.add_hparam('min_f0', min_f0)
        hparams.add_hparam('max_f0', max_f0)
        hparams.add_hparam('soft_min_f0', soft_min_f0)
        hparams.add_hparam('penalty_factor', penalty_factor)
        hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
        hparams.add_hparam('resample_freq', resample_freq)
        hparams.add_hparam('delta_pitch', delta_pitch)
        hparams.add_hparam('nccf_ballast', nccf_ballast)
        hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
        hparams.add_hparam('upsample_filter_width', upsample_filter_width)
        hparams.add_hparam('max_frames_latency', max_frames_latency)
        hparams.add_hparam('frames_per_chunk', frames_per_chunk)
        hparams.add_hparam('simulate_first_pass_online',
                           simulate_first_pass_online)
        hparams.add_hparam('recompute_frame', recompute_frame)
        hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)

        hparams.add_hparam('pitch_scale', pitch_scale)
        hparams.add_hparam('pov_offset', pov_offset)
        hparams.add_hparam('pov_scale', pov_scale)
        hparams.add_hparam('delta_pitch_scale', delta_pitch_scale)
        hparams.add_hparam('delta_pitch_noise_stddev',
                           delta_pitch_noise_stddev)
        hparams.add_hparam('normalization_left_context',
                           normalization_left_context)
        hparams.add_hparam('normalization_right_context',
                           normalization_right_context)
        hparams.add_hparam('delta_window', delta_window)
        hparams.add_hparam('delay', delay)
        hparams.add_hparam('add_pov_feature', add_pov_feature)
        hparams.add_hparam('add_normalized_log_pitch',
                           add_normalized_log_pitch)
        hparams.add_hparam('add_delta_pitch', add_delta_pitch)
        hparams.add_hparam('add_raw_log_pitch', add_raw_log_pitch)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 19
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains seven optional parameters:
            --norm_means   : Flag of norm_means. (bool, default=True)
            --norm_vars    : Flag of norm_vars. (bool, default=False)
            --utt2spk      : Use for speaker CMVN. (string, default=None)
            --spk2utt      : Rspecifier for speaker to utterance-list map.
                            (string, default=None)
            --reverse      : Flag of reverse. (bool, default=False)
            --std_floor    : Floor to std. (float, default=1.0e-20)
            --filetype     : Type of input file. (string, default='mat')
    :return:
    """
        norm_means = True
        norm_vars = False
        utt2spk = None
        spk2utt = None
        reverse = False
        std_floor = 1.0e-20
        filetype = 'mat'

        hparams = HParams(cls=cls)
        hparams.add_hparam('norm_means', norm_means)
        hparams.add_hparam('norm_vars', norm_vars)
        hparams.add_hparam('utt2spk', utt2spk)
        hparams.add_hparam('spk2utt', spk2utt)
        hparams.add_hparam('reverse', reverse)
        hparams.add_hparam('std_floor', std_floor)
        hparams.add_hparam('filetype', filetype)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 20
0
    def params(cls, config=None):

        norm_means = True
        norm_vars = False
        utt2spk = None
        spk2utt = None
        reverse = False
        std_floor = 1.0e-20
        filetype = 'mat'

        hparams = HParams(cls=cls)
        hparams.add_hparam('norm_means', norm_means)
        hparams.add_hparam('norm_vars', norm_vars)
        hparams.add_hparam('utt2spk', utt2spk)
        hparams.add_hparam('spk2utt', spk2utt)
        hparams.add_hparam('reverse', reverse)
        hparams.add_hparam('std_floor', std_floor)
        hparams.add_hparam('filetype', filetype)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams
Ejemplo n.º 21
0
  def params(cls, config=None):
    """
    Set params.
    :param config: contains twenty optional parameters:
      --delta-pitch               : Smallest relative change in pitch that our algorithm measures (float, default = 0.005)
		  --frame-length              : Frame length in milliseconds (float, default = 25)
		  --frame-shift               : Frame shift in milliseconds (float, default = 10)
		  --frames-per-chunk          : Only relevant for offline pitch extraction (e.g. compute-kaldi-pitch-feats), you can set it to a small nonzero value, such as 10, for better feature compatibility with online decoding (affects energy normalization in the algorithm) (int, default = 0)
		  --lowpass-cutoff            : cutoff frequency for LowPass filter (Hz)  (float, default = 1000)
		  --lowpass-filter-width      : Integer that determines filter width of lowpass filter, more gives sharper filter (int, default = 1)
		  --max-f0                    : max. F0 to search for (Hz) (float, default = 400)
		  --max-frames-latency        : Maximum number of frames of latency that we allow pitch tracking to introduce into the feature processing (affects output only if --frames-per-chunk > 0 and --simulate-first-pass-online=true (int, default = 0)
		  --min-f0                    : min. F0 to search for (Hz) (float, default = 50)
		  --nccf-ballast              : Increasing this factor reduces NCCF for quiet frames (float, default = 7000)
		  --nccf-ballast-online       : This is useful mainly for debug; it affects how the NCCF ballast is computed. (bool, default = false)
		  --penalty-factor            : cost factor for FO change. (float, default = 0.1)
		  --preemphasis-coefficient   : Coefficient for use in signal preemphasis (deprecated) (float, default = 0)
		  --recompute-frame           : Only relevant for online pitch extraction, or for compatibility with online pitch extraction.  A non-critical parameter; the frame at which we recompute some of the forward pointers, after revising our estimate of the signal energy.  Relevant if--frames-per-chunk > 0 (int, default = 500)
		  --resample-frequency        : Frequency that we down-sample the signal to.  Must be more than twice lowpass-cutoff (float, default = 4000)
		  --sample-frequency          : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
		  --simulate-first-pass-online : If true, compute-kaldi-pitch-feats will output features that correspond to what an online decoder would see in the first pass of decoding-- not the final version of the features, which is the default.  Relevant if --frames-per-chunk > 0 (bool, default = false)
		  --snip-edges                : If this is set to false, the incomplete frames near the ending edge won't be snipped, so that the number of frames is the file size divided by the frame-shift. This makes different types of features give the same number of frames. (bool, default = true)
		  --soft-min-f0               : Minimum f0, applied in soft way, must not exceed min-f0 (float, default = 10)
      --upsample-filter-width     : Integer that determines filter width when upsampling NCCF (int, default = 5)
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """

    hparams = HParams(cls=cls)
    window_length = 0.025
    frame_length = 0.010
    sample_rate = 16000
    snip_edges = True
    preemph_coeff = 0.0
    min_f0 = 50.0
    max_f0 = 400.0
    soft_min_f0 = 10.0
    penalty_factor = 0.1
    lowpass_cutoff = 1000.0
    resample_freq = 4000.0
    delta_pitch = 0.005
    nccf_ballast = 7000.0
    lowpass_filter_width = 1
    upsample_filter_width = 5
    max_frames_latency = 0
    frames_per_chunk = 0
    simulate_first_pass_online = False
    recompute_frame = 500
    nccf_ballast_online = False

    hparams.add_hparam('window_length', window_length)
    hparams.add_hparam('frame_length', frame_length)
    hparams.add_hparam('sample_rate', sample_rate)
    hparams.add_hparam('snip_edges', snip_edges)
    hparams.add_hparam('preemph_coeff', preemph_coeff)
    hparams.add_hparam('min_f0', min_f0)
    hparams.add_hparam('max_f0', max_f0)
    hparams.add_hparam('soft_min_f0', soft_min_f0)
    hparams.add_hparam('penalty_factor', penalty_factor)
    hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
    hparams.add_hparam('resample_freq', resample_freq)
    hparams.add_hparam('delta_pitch', delta_pitch)
    hparams.add_hparam('nccf_ballast', nccf_ballast)
    hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
    hparams.add_hparam('upsample_filter_width', upsample_filter_width)
    hparams.add_hparam('max_frames_latency', max_frames_latency)
    hparams.add_hparam('frames_per_chunk', frames_per_chunk)
    hparams.add_hparam('simulate_first_pass_online', simulate_first_pass_online)
    hparams.add_hparam('recompute_frame', recompute_frame)
    hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)

    if config is not None:
      hparams.override_from_dict(config)

    return hparams
Ejemplo n.º 22
0
    def params(cls, config=None):
        """
    Set params.
    :param config: contains eight optional parameters:upper_frequency_limit(float, default=4000.0),
    lower_frequency_limit(float, default=20.0), filterbank_channel_count(float, default=40.0),
    window_length(float, default=0.025), frame_length(float, default=0.010),
    thres_autoc(float, default=0.3), output_type(int, default=2), sample_rate(int, default=16000).
    :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
    """
        hparams = HParams(cls=cls)

        upper_frequency_limit = 0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        raw_energy = 1
        preeph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True
        output_type = 1
        sample_rate = 16000
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False

        hparams.add_hparam('sample_rate', sample_rate)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('preemph_coeff', preemph_coeff)
        hparams.add_hparam('min_f0', min_f0)
        hparams.add_hparam('max_f0', max_f0)
        hparams.add_hparam('soft_min_f0', soft_min_f0)
        hparams.add_hparam('penalty_factor', penalty_factor)
        hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
        hparams.add_hparam('resample_freq', resample_freq)
        hparams.add_hparam('delta_pitch', delta_pitch)
        hparams.add_hparam('nccf_ballast', nccf_ballast)
        hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
        hparams.add_hparam('upsample_filter_width', upsample_filter_width)
        hparams.add_hparam('max_frames_latency', max_frames_latency)
        hparams.add_hparam('frames_per_chunk', frames_per_chunk)
        hparams.add_hparam('simulate_first_pass_online',
                           simulate_first_pass_online)
        hparams.add_hparam('recompute_frame', recompute_frame)
        hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preeph_coeff', preeph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams