Python HParams Examples

Programming Language: Python

Namespace/Package Name: transform.utils.hparam

Class/Type: HParams

Examples at hotexamples.com: 9

Python HParams - 9 examples found. These are the top rated real world Python examples of transform.utils.hparam.HParams extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

HParams(9)

add_hparam(9)

parse(8)

append(4)

channel(2)

type(2)

global_variance(1)

override_from_dict(1)

Example #1

Show file

    def params(cls, config=None):
        """
        Set params.
        :param config: contains one optional parameters: audio_channels(int, default=1).
        :return: An object of class HParams, which is a set of hyperparameters as
                name-value pairs.
        """
        audio_channels = 1

        hparams = HParams(cls=cls)
        hparams.add_hparam('type', 'ReadWav')
        hparams.add_hparam('audio_channels', audio_channels)

        if config is not None:
            hparams.parse(config, True)

        return hparams

Example #2

Show file

    def params(cls, config=None):
        """
          Set params.
           :param config: contains one optional parameters:sample_rate.
                          (int, default=16000).
           :return: An object of class HParams, which is a set of hyperparameters as
                    name-value pairs.
           """

        sample_rate = 16000

        hparams = HParams(cls=cls)
        hparams.add_hparam('sample_rate', sample_rate)

        if config is not None:
            hparams.override_from_dict(config)

        return hparams

Example #3

Show file

File: fbank.py Project: wgfi110/athena-transform

    def params(cls, config=None):
        """
        Set params.
        :param config: contains thirteen optional parameters:
                --window_length				: Window length in seconds. (float, default = 0.025)
                --frame_length				: Hop length in seconds. (float, default = 0.010)
                --snip_edges				: If True, the last frame (shorter than window_length) will be
                                              cutoff. If False, 1 // 2 frame_length data will be padded
                                              to data. (bool, default = True)
                ---raw_energy				: If 1, compute frame energy before preemphasis and
                                              windowing. If 2,  compute frame energy after
                                              preemphasis and windowing. (int, default = 1)
                --preEph_coeff				: Coefficient for use in frame-signal preemphasis.
                                             (float, default = 0.97)
                --window_type				: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                             (string, default = "povey")
                --remove_dc_offset			: Subtract mean from waveform on each frame.
                                              (bool, default = true)
                --is_fbank					: If true, compute power spetrum without frame energy.
                                              If false, using the frame energy instead of the
                                              square of the constant component of the signal.
                                              (bool, default = true)
                --output_type				: If 1, return power spectrum. If 2, return log-power
                                              spectrum. (int, default = 1)
                --upper_frequency_limit		: High cutoff frequency for mel bins (if <= 0, offset
                                             from Nyquist) (float, default = 0)
                --lower_frequency_limit		: Low cutoff frequency for mel bins (float, default = 20)
                --filterbank_channel_count	: Number of triangular mel-frequency bins.
                                             (float, default = 23)
                --dither			    	: Dithering constant (0.0 means no dither).
                                             (float, default = 1) [add robust to training]
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        hparams = HParams(cls=cls)

        # spectrum
        hparams.append(Spectrum.params({'output_type': 1, 'is_fbank': True}))

        # fbank
        upper_frequency_limit = 0
        lower_frequency_limit = 60
        filterbank_channel_count = 40
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count', filterbank_channel_count)

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam('delta_delta', delta_delta)
        hparams.add_hparam('order', order)
        hparams.add_hparam('window', window)

        if config is not None:
            hparams.parse(config, True)

        hparams.type = 'Fbank'

        hparams.add_hparam('channel', 1)
        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        return hparams

Example #4

Show file

    def params(cls, config=None):
        """
        Set params.
        :param config: contains four optional parameters:
            --window_length		: Window length in seconds. (float, default = 0.025)
            --frame_length			: Hop length in seconds. (float, default = 0.010)
            --snip_edges			: If True, the last frame (shorter than window_length)
                                      will be cutoff. If False, 1 // 2 frame_length data will
                                      be padded to data. (bool, default = True)
            --remove_dc_offset		: Subtract mean from waveform on each frame (bool, default = true)
        :return:An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        window_length = 0.025
        frame_length = 0.010
        snip_edges = True
        remove_dc_offset = True

        hparams = HParams(cls=cls)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)

        if config is not None:
            hparams.parse(config, True)

        return hparams

Example #5

Show file

    def params(cls, config=None):
        """
        Set params.
        :param config: contains nineteen optional parameters:
              --delta-pitch               : Smallest relative change in pitch that our algorithm
                                            measures (float, default = 0.005)
              --window_length             : Frame length in seconds (float, default = 0.025)
              --frame_length              : Frame shift in seconds (float, default = 0.010)
              --frames-per-chunk          : Only relevant for offline pitch extraction (e.g.
                                            compute-kaldi-pitch-feats), you can set it to a small
                                            nonzero value, such as 10, for better feature
                                            compatibility with online decoding (affects energy
                                            normalization in the algorithm) (int, default = 0)
              --lowpass-cutoff            : cutoff frequency for LowPass filter (Hz).
                                            (float, default = 1000)
              --lowpass-filter-width      : Integer that determines filter width of lowpass filter,
                                            more gives sharper filter (int, default = 1)
              --max-f0                    : max. F0 to search for (Hz) (float, default = 400)
              --max-frames-latency        : Maximum number of frames of latency that we allow pitch
                                            tracking to introduce into the feature processing
                                            (affects output only if --frames-per-chunk > 0 and
                                            --simulate-first-pass-online=true (int, default = 0)
              --min-f0                    : min. F0 to search for (Hz) (float, default = 50)
              --nccf-ballast              : Increasing this factor reduces NCCF for quiet frames.
                                            (float, default = 7000)
              --nccf-ballast-online       : This is useful mainly for debug; it affects how the NCCF
                                            ballast is computed. (bool, default = false)
              --penalty-factor            : cost factor for FO change. (float, default = 0.1)
              --preemphasis-coefficient   : Coefficient for use in signal preemphasis (deprecated).
                                            (float, default = 0)
              --recompute-frame           : Only relevant for online pitch extraction, or for
                                            compatibility with online pitch extraction.  A
                                            non-critical parameter; the frame at which we recompute
                                            some of the forward pointers, after revising our
                                            estimate of the signal energy.  Relevant
                                            if--frames-per-chunk > 0. (int, default = 500)
              --resample-frequency        : Frequency that we down-sample the signal to.  Must be
                                            more than twice lowpass-cutoff (float, default = 4000)
              --simulate-first-pass-online : If true, compute-kaldi-pitch-feats will output features
                                            that correspond to what an online decoder would see in
                                            the first pass of decoding-- not the final version of
                                            the features, which is the default.  Relevant if
                                            --frames-per-chunk > 0 (bool, default = false)
              --snip-edges                : If this is set to false, the incomplete frames near the
                                            ending edge won't be snipped, so that the number of
                                            frames is the file size divided by the frame-shift.
                                            This makes different types of features give the same
                                            number of frames. (bool, default = true)
              --soft-min-f0               : Minimum f0, applied in soft way, must not exceed min-f0.
                                            (float, default = 10)
              --upsample-filter-width     : Integer that determines filter width when upsampling
                                            NCCF. (int, default = 5)
              --add-delta-pitch           : If true, time derivative of log-pitch is added to
                                            output features. (bool, default = true)
              --add-pov-feature           : If true, the warped NCCF is added to output features.
                                            (bool, default = true)
              --add-raw-log-pitch         : If true, log(pitch) is added to output features.
                                            (bool, default = false)
              --delay                     : Number of frames by which the pitch information is
                                            delayed. (int, default = 0)
              --delta-pitch-noise-stddev  : Standard deviation for noise we add to the delta
                                            log-pitch (before scaling); should be about the same as
                                            delta-pitch option to pitch creation.  The purpose is
                                            to get rid of peaks in the delta-pitch caused by
                                            discretization of pitch values. (float, default = 0.005)
              --delta-pitch-scale         : Term to scale the final delta log-pitch feature.
                                            (float, default = 10)
              --delta-window              : Number of frames on each side of central frame,
                                            to use for delta window. (int, default = 2)
              --normalization-left-context : Left-context (in frames) for moving window
                                            normalization. (int, default = 75)
              --normalization-right-context : Right-context (in frames) for moving window
                                            normalization. (int, default = 75)
              --pitch-scale               : Scaling factor for the final normalized log-pitch
                                            value. (float, default = 2)
              --pov-offset                : This can be used to add an offset to the POV feature.
                                            Intended for use in online decoding as a substitute
                                            for  CMN. (float, default = 0)
              --pov-scale                 : Scaling factor for final POV (probability of voicing)
                                            feature. (float, default = 2)
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        hparams = HParams(cls=cls)

        window_length = 0.025
        frame_length = 0.010
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False

        pitch_scale = 2.0
        pov_scale = 2.0
        pov_offset = 0.0
        delta_pitch_scale = 10.0
        delta_pitch_noise_stddev = 0.005
        normalization_left_context = 75
        normalization_right_context = 75
        delta_window = 2
        delay = 0
        add_pov_feature = True
        add_normalized_log_pitch = True
        add_delta_pitch = True
        add_raw_log_pitch = False

        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('preemph_coeff', preemph_coeff)
        hparams.add_hparam('min_f0', min_f0)
        hparams.add_hparam('max_f0', max_f0)
        hparams.add_hparam('soft_min_f0', soft_min_f0)
        hparams.add_hparam('penalty_factor', penalty_factor)
        hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
        hparams.add_hparam('resample_freq', resample_freq)
        hparams.add_hparam('delta_pitch', delta_pitch)
        hparams.add_hparam('nccf_ballast', nccf_ballast)
        hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
        hparams.add_hparam('upsample_filter_width', upsample_filter_width)
        hparams.add_hparam('max_frames_latency', max_frames_latency)
        hparams.add_hparam('frames_per_chunk', frames_per_chunk)
        hparams.add_hparam('simulate_first_pass_online',
                           simulate_first_pass_online)
        hparams.add_hparam('recompute_frame', recompute_frame)
        hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)

        hparams.add_hparam('pitch_scale', pitch_scale)
        hparams.add_hparam('pov_offset', pov_offset)
        hparams.add_hparam('pov_scale', pov_scale)
        hparams.add_hparam('delta_pitch_scale', delta_pitch_scale)
        hparams.add_hparam('delta_pitch_noise_stddev',
                           delta_pitch_noise_stddev)
        hparams.add_hparam('normalization_left_context',
                           normalization_left_context)
        hparams.add_hparam('normalization_right_context',
                           normalization_right_context)
        hparams.add_hparam('delta_window', delta_window)
        hparams.add_hparam('delay', delay)
        hparams.add_hparam('add_pov_feature', add_pov_feature)
        hparams.add_hparam('add_normalized_log_pitch',
                           add_normalized_log_pitch)
        hparams.add_hparam('add_delta_pitch', add_delta_pitch)
        hparams.add_hparam('add_raw_log_pitch', add_raw_log_pitch)

        if config is not None:
            hparams.parse(config, True)

        return hparams

Example #6

Show file

File: spectrum.py Project: wgfi110/athena-transform

    def params(cls, config=None):
        """
        Set params.
        :param config: contains nine optional parameters：
              --window_length		: Window length in seconds. (float, default = 0.025)
              --frame_length		: Hop length in seconds. (float, default = 0.010)
              --snip_edges			: If True, the last frame (shorter than window_length)
                                      will be cutoff. If False, 1 // 2 frame_length data will
                                      be padded to data. (bool, default = True)
              ---raw_energy			: If 1, compute frame energy before preemphasis and windowing.
                                      If 2,  compute frame energy after preemphasis and windowing.
                                      (int, default = 1)
              --preEph_coeff		: Coefficient for use in frame-signal preemphasis.
                                     (float, default = 0.97)
              --window_type			: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                      (string, default = "povey")
              --remove_dc_offset	: Subtract mean from waveform on each frame.
                                     (bool, default = true)
              --is_fbank			: If true, compute power spetrum without frame energy.
                                      If false, using the frame energy instead of the square of the
                                      constant component of the signal. (bool, default = false)
              --output_type			: If 1, return power spectrum. If 2, return log-power spectrum.
                                      (int, default = 2)
              --dither		        : Dithering constant (0.0 means no dither).
                                     (float, default = 1) [add robust to training]
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """
        window_length = 0.025
        frame_length = 0.010
        output_type = 2
        snip_edges = True
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = False
        dither = 0.0

        hparams = HParams(cls=cls)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preEph_coeff', preEph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)
        hparams.add_hparam('dither', dither)

        # cmvn
        hparams.append(CMVN.params())

        if config is not None:
            hparams.parse(config, True)
        hparams.type = 'Spectrum'

        return hparams

Example #7

Show file

    def params(cls, config=None):
        """
        Set params.
        :param config: contains twenty-nine optional parameters:t
              --window_length		      : Window length in seconds. (float, default = 0.025)
              --frame_length			  : Hop length in seconds. (float, default = 0.010)
              --snip_edges				  : If 1, the last frame (shorter than window_length) will
                                            be cutoff. If 2, 1 // 2 frame_length data will be padded
                                             to data. (int, default = 1)
              ---raw_energy				  : If 1, compute frame energy before preemphasis and
                                            windowing. If 2,  compute frame energy after preemphasis
                                             and windowing. (int, default = 1)
              --preEph_coeff			  : Coefficient for use in frame-signal preemphasis.
                                            (float, default = 0.97)
              --window_type				  : Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                            (string, default = "povey")
              --remove_dc_offset	      : Subtract mean from waveform on each frame.
                                            (bool, default = true)
              --is_fbank				  : If true, compute power spetrum without frame
                                            energy. If false, using the frame energy instead
                                             of the square of the constant component of the
                                             signal. (bool, default = true)
              --output_type				  : If 1, return power spectrum. If 2, return
                                            log-power spectrum. (int, default = 1)
              --upper_frequency_limit	  : High cutoff frequency for mel bins.
                                            (if <= 0, offset from Nyquist) (float, default = 0)
              --lower_frequency_limit	  : Low cutoff frequency for mel bins.
                                            (float, default = 20)
              --filterbank_channel_count  : Number of triangular mel-frequency bins.
                                            (float, default = 23)
              --dither			    	  : Dithering constant (0.0 means no dither).
                                            (float, default = 1)
                [add robust to training]
              --delta-pitch               : Smallest relative change in pitch that our
                                            algorithm measures. (float, default = 0.005)
              --frames-per-chunk          : Only relevant for offline pitch extraction.
                                            (e.g. compute-kaldi-pitch-feats), you can set it to a
                                            small nonzero value, such as 10, for better feature
                                            compatibility with online decoding (affects energy
                                            normalization in the algorithm) (int, default = 0)
              --lowpass-cutoff            : cutoff frequency for LowPass filter (Hz).
                                            (float, default = 1000)
              --lowpass-filter-width      : Integer that determines filter width of lowpass filter,
                                            more gives sharper filter (int, default = 1)
              --max-f0                    : max. F0 to search for (Hz) (float, default = 400)
              --max-frames-latency        : Maximum number of frames of latency that we allow pitch
                                            tracking to introduce into the feature processing
                                            (affects output only if --frames-per-chunk > 0 and
                                            --simulate-first-pass-online=true (int, default = 0)
              --min-f0                    : min. F0 to search for (Hz) (float, default = 50)
              --nccf-ballast              : Increasing this factor reduces NCCF for quiet frames.
                                            (float, default = 7000)
              --nccf-ballast-online       : This is useful mainly for debug; it affects how the
                                            NCCF ballast is computed. (bool, default = false)
              --penalty-factor            : cost factor for FO change. (float, default = 0.1)
              --preemphasis-coefficient   : Coefficient for use in signal preemphasis (deprecated)
                                            (float, default = 0)
              --recompute-frame           : Only relevant for online pitch extraction, or for
                                            compatibility with online pitch extraction.  A
                                            non-critical parameter; the frame at which we recompute
                                            some of the forward pointers, after revising our
                                            estimate of the signal energy. Relevant
                                            if--frames-per-chunk > 0. (int, default = 500)
              --resample-frequency        : Frequency that we down-sample the signal to. Must be
                                            more than twice lowpass-cutoff (float, default = 4000)
              --simulate-first-pass-online : If true, compute-kaldi-pitch-feats will output features
                                             that correspond to what an online decoder would see in
                                             the first pass of decoding-- not the final version of
                                             the features, which is the default.  Relevant if
                                             --frames-per-chunk > 0 (bool, default = false)
              --soft-min-f0               : Minimum f0, applied in soft way, must not exceed
                                            min-f0 (float, default = 10)
              --upsample-filter-width     : Integer that determines filter width when upsampling
                                            NCCF (int, default = 5)
              --add-delta-pitch           : If true, time derivative of log-pitch is added to
                                            output features. (bool, default = true)
              --add-pov-feature           : If true, the warped NCCF is added to output features.
                                            (bool, default = true)
              --add-raw-log-pitch         : If true, log(pitch) is added to output features.
                                            (bool, default = false)
              --delay                     : Number of frames by which the pitch information is
                                            delayed. (int, default = 0)
              --delta-pitch-noise-stddev  : Standard deviation for noise we add to the delta
                                            log-pitch (before scaling); should be about the same as
                                            delta-pitch option to pitch creation.  The purpose is
                                            to get rid of peaks in the delta-pitch caused by
                                            discretization of pitch values. (float, default = 0.005)
              --delta-pitch-scale         : Term to scale the final delta log-pitch feature.
                                            (float, default = 10)
              --delta-window              : Number of frames on each side of central frame,
                                            to use for delta window. (int, default = 2)
              --normalization-left-context : Left-context (in frames) for moving window
                                            normalization. (int, default = 75)
              --normalization-right-context : Right-context (in frames) for moving window
                                            normalization. (int, default = 75)
              --pitch-scale               : Scaling factor for the final normalized log-pitch
                                            value. (float, default = 2)
              --pov-offset                : This can be used to add an offset to the POV feature.
                                            Intended for use in online decoding as a substitute
                                            for  CMN. (float, default = 0)
              --pov-scale                 : Scaling factor for final POV (probability of voicing)
                                            feature. (float, default = 2)
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """
        hparams = HParams(cls=cls)
        hparams.append(CMVN.params())

        upper_frequency_limit = 0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 80.0
        window_length = 0.025
        frame_length = 0.010
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True
        output_type = 1
        dither = 0.0
        snip_edges = True
        preemph_coeff = 0.0
        min_f0 = 50.0
        max_f0 = 400.0
        soft_min_f0 = 10.0
        penalty_factor = 0.1
        lowpass_cutoff = 1000.0
        resample_freq = 4000.0
        delta_pitch = 0.005
        nccf_ballast = 7000.0
        lowpass_filter_width = 1
        upsample_filter_width = 5
        max_frames_latency = 0
        frames_per_chunk = 0
        simulate_first_pass_online = False
        recompute_frame = 500
        nccf_ballast_online = False

        pitch_scale = 2.0
        pov_scale = 2.0
        pov_offset = 0.0
        delta_pitch_scale = 10.0
        delta_pitch_noise_stddev = 0.005
        normalization_left_context = 75
        normalization_right_context = 75
        delta_window = 2
        delay = 0
        add_pov_feature = True
        add_normalized_log_pitch = True
        add_delta_pitch = True
        add_raw_log_pitch = False

        # delta
        delta_delta = False  # True
        order = 2
        window = 2
        hparams.add_hparam('delta_delta', delta_delta)
        hparams.add_hparam('order', order)
        hparams.add_hparam('window', window)
        hparams.add_hparam('channel', 1)

        if hparams.delta_delta:
            hparams.channel = hparams.order + 1

        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('preemph_coeff', preemph_coeff)
        hparams.add_hparam('min_f0', min_f0)
        hparams.add_hparam('max_f0', max_f0)
        hparams.add_hparam('dither', dither)
        hparams.add_hparam('soft_min_f0', soft_min_f0)
        hparams.add_hparam('penalty_factor', penalty_factor)
        hparams.add_hparam('lowpass_cutoff', lowpass_cutoff)
        hparams.add_hparam('resample_freq', resample_freq)
        hparams.add_hparam('delta_pitch', delta_pitch)
        hparams.add_hparam('nccf_ballast', nccf_ballast)
        hparams.add_hparam('lowpass_filter_width', lowpass_filter_width)
        hparams.add_hparam('upsample_filter_width', upsample_filter_width)
        hparams.add_hparam('max_frames_latency', max_frames_latency)
        hparams.add_hparam('frames_per_chunk', frames_per_chunk)
        hparams.add_hparam('simulate_first_pass_online',
                           simulate_first_pass_online)
        hparams.add_hparam('recompute_frame', recompute_frame)
        hparams.add_hparam('nccf_ballast_online', nccf_ballast_online)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preEph_coeff', preEph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)

        hparams.add_hparam('pitch_scale', pitch_scale)
        hparams.add_hparam('pov_offset', pov_offset)
        hparams.add_hparam('pov_scale', pov_scale)
        hparams.add_hparam('delta_pitch_scale', delta_pitch_scale)
        hparams.add_hparam('delta_pitch_noise_stddev',
                           delta_pitch_noise_stddev)
        hparams.add_hparam('normalization_left_context',
                           normalization_left_context)
        hparams.add_hparam('normalization_right_context',
                           normalization_right_context)
        hparams.add_hparam('delta_window', delta_window)
        hparams.add_hparam('delay', delay)
        hparams.add_hparam('add_pov_feature', add_pov_feature)
        hparams.add_hparam('add_normalized_log_pitch',
                           add_normalized_log_pitch)
        hparams.add_hparam('add_delta_pitch', add_delta_pitch)
        hparams.add_hparam('add_raw_log_pitch', add_raw_log_pitch)

        if config is not None:
            hparams.parse(config, True)

        return hparams

Example #8

Show file

File: cmvn.py Project: wgfi110/athena-transform

    def params(cls, config=None):
        """
        Set params.
        :param config: contains four optional parameters:
                --type              : Type of Opration. (string, default = 'CMVN')
                --global_mean       : Global mean of features. (float, default = 0.0)
                --global_variance   : Global variance of features. (float, default = 1.0)
                --local_cmvn        : If ture, local cmvn will be done on features. (bool, default = False)
        :return:
        """

        hparams = HParams(cls=cls)
        hparams.add_hparam('type', 'CMVN')
        hparams.add_hparam('global_mean', [0.0])
        hparams.add_hparam('global_variance', [1.0])
        hparams.add_hparam('local_cmvn', False)

        if config is not None:
            hparams.parse(config, True)

        assert len(hparams.global_mean) == len(hparams.global_variance), \
            'Error, global_mean length {} is not equals to global_variance length {}'. \
                format(len(hparams.global_mean), len(hparams.global_variance))

        hparams.global_variance = (np.sqrt(hparams.global_variance) +
                                   1e-6).tolist()
        return hparams

Example #9

Show file

    def params(cls, config=None):
        """
        Set params.
        :param config: contains fourteen optional parameters.
            --window_length				: Window length in seconds. (float, default = 0.025)
            --frame_length				: Hop length in seconds. (float, default = 0.010)
            --snip_edges				: If 1, the last frame (shorter than window_length) will
                                          be cutoff. If 2, 1 // 2 frame_length data will be padded
                                          to data. (int, default = 1)
            ---raw_energy				: If 1, compute frame energy before preemphasis and
                                          windowing. If 2, compute frame energy after
                                          preemphasis and windowing. (int, default = 1)
            --preEph_coeff			    : Coefficient for use in frame-signal preemphasis.
                                          (float, default = 0.97)
            --window_type				: Type of window ("hamm"|"hann"|"povey"|"rect"|"blac"|"tria").
                                          (string, default = "povey")
            --remove_dc_offset		    : Subtract mean from waveform on each frame
                                          (bool, default = true)
            --is_fbank					: If true, compute power spetrum without frame energy. If
                                          false, using the frame energy instead of the square of the
                                          constant component of the signal. (bool, default = true)
            --output_type				: If 1, return power spectrum. If 2, return log-power
                                          spectrum. (int, default = 1)
            --upper_frequency_limit		: High cutoff frequency for mel bins (if < 0, offset from
                                          Nyquist) (float, default = 0)
            --lower_frequency_limit		: Low cutoff frequency for mel bins (float, default = 20)
            --filterbank_channel_count	: Number of triangular mel-frequency bins.
                                         (float, default = 23)
            --coefficient_count         : Number of cepstra in MFCC computation.
                                         (int, default = 13)
            --cepstral_lifter           : Constant that controls scaling of MFCCs.
                                         (float, default = 22)
            --use_energy                :Use energy (not C0) in MFCC computation.
                                         (bool, default = True)
        :return: An object of class HParams, which is a set of hyperparameters as name-value pairs.
        """

        upper_frequency_limit = 0.0
        lower_frequency_limit = 20.0
        filterbank_channel_count = 23.0
        window_length = 0.025
        frame_length = 0.010
        output_type = 1
        snip_edges = 1
        raw_energy = 1
        preEph_coeff = 0.97
        window_type = 'povey'
        remove_dc_offset = True
        is_fbank = True
        cepstral_lifter = 22.0
        coefficient_count = 13
        use_energy = True
        dither = 0.0
        delta_delta = False
        order = 2
        window = 2

        hparams = HParams(cls=cls)
        hparams.add_hparam('upper_frequency_limit', upper_frequency_limit)
        hparams.add_hparam('lower_frequency_limit', lower_frequency_limit)
        hparams.add_hparam('filterbank_channel_count',
                           filterbank_channel_count)
        hparams.add_hparam('window_length', window_length)
        hparams.add_hparam('frame_length', frame_length)
        hparams.add_hparam('output_type', output_type)
        hparams.add_hparam('snip_edges', snip_edges)
        hparams.add_hparam('raw_energy', raw_energy)
        hparams.add_hparam('preEph_coeff', preEph_coeff)
        hparams.add_hparam('window_type', window_type)
        hparams.add_hparam('remove_dc_offset', remove_dc_offset)
        hparams.add_hparam('is_fbank', is_fbank)
        hparams.add_hparam('cepstral_lifter', cepstral_lifter)
        hparams.add_hparam('coefficient_count', coefficient_count)
        hparams.add_hparam('use_energy', use_energy)
        hparams.add_hparam('dither', dither)
        hparams.add_hparam('delta_delta', delta_delta)
        hparams.add_hparam('order', order)
        hparams.add_hparam('window', window)
        hparams.add_hparam('channel', 1)

        hparams.append(CMVN.params())

        if config is not None:
            hparams.parse(config, True)

        return hparams