Python get Examples, hparams.hparams.get Python Examples

Example #1

0

Show file

File: data_feed.py Project: xbsdsongnan/tacotron

    def __init__(self, coordinator, in_dir, logger):
        super(DataFeeder, self).__init__()
        self._coordinator = coordinator
        self._in_dir = in_dir
        self._logger = logger
        self._metadata = load_metadata(os.path.join(in_dir, 'train.txt'),
                                       self._logger)
        random.shuffle(self._metadata)
        self._cursor = 0  # index of the next sample
        self._num_samples = len(self._metadata)
        self._hparams = hparams
        self.batch_size = hparams.get('batch_size')
        self.superbatch_size = hparams.get('superbatch_size')
        self.outputs_per_step = hparams.get('outputs_per_step')

        # Placeholders for inputs and targets.
        self._placeholders = [
            tf.placeholder(tf.int32, [None, None], 'inputs'),
            tf.placeholder(tf.int32, [None], 'input_lengths'),
            tf.placeholder(tf.float32,
                           [None, None, hparams.get('num_mels')],
                           'mel_targets'),
            tf.placeholder(tf.float32,
                           [None, None, hparams.get('num_freq')],
                           'linear_targets')
        ]

        # Create queue of capacity 8 for buffering data which
        # will buffer 8 superbatches onto the FIFO queue
        queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.float32],
                             name='input_queue')
        self._enqueue_operation = queue.enqueue(self._placeholders)
        self.current_batch = Batch(queue.dequeue(), prep=False)
        self.current_batch.set_shapes(self._placeholders)

Example #2

0

Show file

def _build_mel_basis():
    '''
        Creates a filterbank matrix to combine FFT bins into
        mel-frequency bins
    '''
    return librosa.filters.mel(hparams.get('sample_rate'),
                               hparams.get('n_fft'),
                               n_mels=hparams.get('num_mels'))

Example #3

0

Show file

def spectrogram_tensorflow_inv(spect):
    '''Builds computational graph to convert spectrogram to waveform using TensorFlow.

    Unlike spectrogram_inv, this does NOT invert the preemphasis. The caller should call
    inv_preemphasis on the output after running the graph.
  '''
    S = _db_to_amp_tensorflow(
        _denormalize_tensorflow(spect) + hparams.get('ref_level_db'))
    return _griffin_lim_tensorflow(tf.pow(S, hparams.get('power')))

Example #4

0

Show file

def _normalize_inv(S):
    '''
        Input
        S: Spectrogram

        Unwinds the normalization function applied
        to the spectrogram. This is used in synthesizing
    '''
    return (np.clip(S, 0, 1) *
            -float(hparams.get('min_level_db'))) + hparams.get('min_level_db')

Example #5

0

Show file

def _normalize(S):
    '''
        Input
        S: Spectrogram

        Returns a normalized version of the spectrogram.
        Since we don't care about absolute volume and only
        care about relatve volume, we pin the spectrogram frequency

    '''
    return np.clip((S - hparams.get('min_level_db')) /
                   -float(hparams.get('min_level_db')), 0, 1)

Example #6

0

Show file

def spectrogram_inv(spect):
    '''
        Input
        spect: A linear spectrogram

        Convert a spectrogram back to a waveform using the
        Griffin-lim algorithm. This is used in synthesizing
    '''
    # Unwind normalization and dB-scaling
    S = _db_to_amp(_normalize_inv(spect) + hparams.get('ref_level_db'))
    # Apply the Griffin-lim algorithm and unwind the pre-emphasis
    return pre_emphasis_inv(_griffin_lim(S**hparams.get('power')))

Example #7

0

Show file

File: batch.py Project: xbsdsongnan/tacotron

 def _prepare_batch(self, outputs_per_step=hparams.get('outputs_per_step')):
     '''
         Prepares both inputs and targets for
         inference
     '''
     self._prepare_inputs()
     self._prepare_targets()

Example #8

0

Show file

File: batch.py Project: xbsdsongnan/tacotron

 def get_embedds(self):
     embedding_table = tf.get_variable(
         'embedding',
         [len(chars), hparams.get('embedded_depth')],
         dtype=tf.float32,
         initializer=tf.truncated_normal_initializer(stddev=0.5))
     return tf.nn.embedding_lookup(embedding_table, self._inputs)

Example #9

0

Show file

def _stft_params():
    '''
        Output
        Given the hyper parameters, return the needed
        parameters for the lirosa STFT method
    
        n_fft: The FFT window size or the num
        hop_length: The number of audio frames between STFT columns
        win_length: Each frame of audio is windowed, where each window
        will be of length win_length and then zero-padded to match up with n_fft
    '''
    n_fft = hparams.get('n_fft')
    hop_length = int(
        hparams.get('frame_shift_ms') / 1000 * hparams.get('sample_rate'))
    win_length = int(
        hparams.get('frame_length_ms') / 1000 * hparams.get('sample_rate'))
    return n_fft, hop_length, win_length

Example #10

0

Show file

File: batch.py Project: xbsdsongnan/tacotron

def round_up(x):
    '''
        Given an integer, x, round up x to the closest product of the
        outputs_per_step hyperparameter (5)

        Param:
            x: an integer

        Output:
            x rounded up to outputs_per_step

    '''
    remainder = x % hparams.get('outputs_per_step')
    if remainder == 0:
        return x
    else:
        return x + hparams.get('outputs_per_step') - remainder

Example #11

0

Show file

def find_endpoint(wav, threshold_db=-40, min_silence_sec=0.8):
    window_length = int(hparams.get('sample_rate') * min_silence_sec)
    hop_length = int(window_length / 4)
    threshold = _db_to_amp(threshold_db)
    for x in range(hop_length, len(wav) - window_length, hop_length):
        if np.max(wav[x:x + window_length]) < threshold:
            return x + hop_length
    return len(wav)

Example #12

0

Show file

File: data_feed.py Project: xbsdsongnan/tacotron

def load_metadata(path, logger):
    '''
        Loads the metadata generated by the prep functions
        at the given path
    '''
    with open(path, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        hours = sum(
            (int(x[2])
             for x in metadata)) * hparams.get('frame_shift_ms') / (3600 *
                                                                    1000)
        logger.log('Loaded metadata for %d examples (%.2f hours)' %
                   (len(metadata), hours))
    return metadata

Example #13

0

Show file

def _griffin_lim_tensorflow(S):
    '''TensorFlow implementation of Griffin-Lim
  Based on https://github.com/Kyubyong/tensorflow-exercises/blob/master/Audio_Processing.ipynb
  '''
    with tf.variable_scope('griffinlim'):
        # TensorFlow's stft and istft operate on a batch of spectrograms; create batch of size 1
        S = tf.expand_dims(S, 0)
        S_complex = tf.identity(tf.cast(S, dtype=tf.complex64))
        y = _istft_tensorflow(S_complex)
        for i in range(hparams.get('griffin_lim_iters')):
            est = _stft_tensorflow(y)
            angles = est / tf.cast(tf.maximum(1e-8, tf.abs(est)), tf.complex64)
            y = _istft_tensorflow(S_complex * angles)
        return tf.squeeze(y, 0)

Example #14

0

Show file

def _griffin_lim(spect):
    '''
        Input
        spect: A spectrogram

        Apply the Griffin-Lim Algorithm (GLA) on the spectrogram
        to estimate the signal that has been STFTed
    '''
    angles = np.exp(2j * np.pi * np.random.rand(*spect.shape))
    S_complex = np.abs(spect).astype(np.complex)
    y = _stft_inv(S_complex * angles)
    for _ in range(hparams.get('griffin_lim_iters')):
        angles = np.exp(1j * np.angle(_stft(y)))
        y = _stft_inv(S_complex * angles)
    return y

Example #15

0

Show file

File: batch.py Project: xbsdsongnan/tacotron

def pad_input(x, length):
    '''
        Given a list, x, and an int, length, add length - len(x) 
        pad values to the back of the list and return a numpy vector
        
        Param:
            x: a list
            length: an integer
        
        Output:
            A padded numpy vector
    '''
    return np.pad(x, (0, length - x.shape[0]),
                  mode='constant',
                  constant_values=hparams.get('pad_value'))

Example #16

0

Show file

def mel_spectrogram(y):
    '''
        Input
        y: a numpy array representing a sound signal

        Output
        A normalized mel-scaled spectrogram. A spectrogram is 
        a 3d structure (Time (ms), Frequency (Hz), Volume (dB))
        TODO Thresholding at ref_level_db is never discussed in
        the tacotron paper
    '''
    D = _stft(pre_emphasis(y))
    S = _amp_to_db(_linspect_to_melspect(
        np.abs(D))) - hparams.get('ref_level_db')
    return _normalize(S)

Example #17

0

Show file

def write_metadata(metadata, output_dir):
    '''
        Writes dataset metadata to train.txt into the given output
        directory that contains the following information for all files:
        "{lin spec file name} | {mel spec file name} | {num frames} | {text}"
    '''
    with open(os.path.join(output_dir, 'train.txt'), 'w',
              encoding='utf-8') as f:
        for m in metadata:
            f.write('|'.join([str(x) for x in m]) + '\n')
        frames = sum([m[2] for m in metadata])
        hours = frames * hparams.get('frame_shift_ms') / (3600 * 1000)
        print('Wrote %d utterances, %d frames (%.2f hours)' %
              (len(metadata), frames, hours))
        print('Max input length:  %d' % max(len(m[3]) for m in metadata))
        print('Max output length: %d' % max(m[2] for m in metadata))

Example #18

0

Show file

File: batch.py Project: xbsdsongnan/tacotron

def pad_target(t, length):
    '''
        Given an 2d array representing the target spectrogram where
        the first axis represents time and the second one frequency, and
        an integer, length, add length - len(time axis) pad values to
        the back of the time axis and return the array

        Param:
            t: a numpy 2d array
            length: an integer

        Output:
            A padded 2d numpy array

    '''
    return np.pad(t, [(0, length - t.shape[0]), (0, 0)],
                  mode='constant',
                  constant_values=hparams.get('pad_value'))

Example #19

0

Show file

def pre_emphasis(x):
    '''
        Input
        x: a numpy array representing a sound signal

        Output
        Applies a pre-emphasis filter on the signal to amplify
        the high frequencies. Given an input signal x, the emphasized
        signal y is described by

            y(t) = x(t) - a*x(t-1),

        where a is the pre emphasis coefficient. 
        
        This is done with lfilter where lfilter(a, b, x) implements
        a[0]*y[n] = b[0]*x[n] + b[1]*x[n-1] + ... + b[M]*x[n-M]
                  - a[1]*y[n-1] - ... - a[N]*y[n-N] 
    '''
    return signal.lfilter([1, -float(hparams.get('preemphasis'))], [1], x)

Example #20

0

Show file

def spectrogram(y):
    '''
        Input
        y: a numpy array representing a sound signal
            
        Output
        A normalized linear-scale spectrogram. A spectrogram is 
        a 2d structure ([Time (ms), Frequency (Hz)] where values
        are  Volume (dB))
        TODO Thresholding at ref_level_db is never discussed in
        the tacotron paper
    '''
    # D is the short-time Fourier transform result of
    # the pre-emphasizes version of the input signal
    D = _stft(pre_emphasis(y))
    # Convert to a dB-scaled spectrogram and threshold
    # the output at ref_level_db
    S = _amp_to_db(np.abs(D)) - hparams.get('ref_level_db')
    # Finally normalize the output
    return _normalize(S)

Example #21

0

Show file

def load_wav(path):
    '''
        Loads a single waveform file from
        disk at the given path
    '''
    return librosa.core.load(path, sr=hparams.get('sample_rate'))[0]

Example #22

0

Show file

def _denormalize_tensorflow(S):
    return (tf.clip_by_value(S, 0, 1) *
            -float(hparams.get('min_level_db'))) + hparams.get('min_level_db')

Example #23

0

Show file

def pre_emphasis_inv(x):
    '''
        Rewinds the pre emphasis filter. This is used
        in synthesizing
    '''
    return signal.lfilter([1], [1, -float(hparams.get('preemphasis'))], x)

Example #24

0

Show file

def save_wav(wav, path):
    wav *= 32767 / max(0.01, np.max(np.abs(wav)))
    librosa.output.write_wav(path, wav.astype(np.int16),
                             hparams.get('sample_rate'))