Python TacotronSTFT Beispiele

Programmiersprache: Python

Namespace / Paketname: mellotron.layers

Klasse / Typ: TacotronSTFT

Beispiele auf hotexamples.com: 4

Python TacotronSTFT - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die mellotron.layers.TacotronSTFT, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

TacotronSTFT(2)

griffin_lim(1)

mel_spectrogram(1)

Beispiel #1

Datei anzeigen

 def __init__(self, training_files, segment_length, filter_length,
              hop_length, win_length, sampling_rate, mel_fmin, mel_fmax):
     if os.path.isfile(str(training_files)):
         self.audio_files = files_to_list(training_files)
     else:
         self.audio_files = []
     random.seed(1234)
     random.shuffle(self.audio_files)
     self.stft = TacotronSTFT(filter_length=filter_length,
                              hop_length=hop_length,
                              win_length=win_length,
                              sampling_rate=sampling_rate,
                              mel_fmin=mel_fmin, mel_fmax=mel_fmax)
     self.segment_length = segment_length
     self.sampling_rate = sampling_rate

Beispiel #2

Datei anzeigen

class Mel2Samp(torch.utils.data.Dataset):
    """
    This is the main class that calculates the spectrogram and returns the
    spectrogram, audio pair.
    """

    def __init__(self, training_files, segment_length, filter_length,
                 hop_length, win_length, sampling_rate, mel_fmin, mel_fmax):
        if os.path.isfile(str(training_files)):
            self.audio_files = files_to_list(training_files)
        else:
            self.audio_files = []
        random.seed(1234)
        random.shuffle(self.audio_files)
        self.stft = TacotronSTFT(filter_length=filter_length,
                                 hop_length=hop_length,
                                 win_length=win_length,
                                 sampling_rate=sampling_rate,
                                 mel_fmin=mel_fmin, mel_fmax=mel_fmax)
        self.segment_length = segment_length
        self.sampling_rate = sampling_rate

    def get_mel(self, audio):
        audio_norm = audio  # audio / MAX_WAV_VALUE
        audio_norm = audio_norm.unsqueeze(0)
        audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False)
        melspec = self.stft.mel_spectrogram(audio_norm)
        melspec = torch.squeeze(melspec, 0)
        return melspec

    def __getitem__(self, index):
        # Read audio
        filename = self.audio_files[index]
        audio, sampling_rate = load_wav_to_torch(filename)
        if sampling_rate != self.sampling_rate:
            raise ValueError("{} SR doesn't match target {} SR".format(
                sampling_rate, self.sampling_rate))

        # Take segment
        if audio.size(0) >= self.segment_length:
            max_audio_start = audio.size(0) - self.segment_length
            audio_start = random.randint(0, max_audio_start)
            audio = audio[audio_start:audio_start + self.segment_length]
        else:
            audio = torch.nn.functional.pad(audio, (0, self.segment_length - audio.size(0)), 'constant').data

        mel = self.get_mel(audio)
        # audio = audio / MAX_WAV_VALUE

        return (mel, audio)

    def __len__(self):
        return len(self.audio_files)

Beispiel #3

Datei anzeigen

import unidecode
import yaml
import librosa

from waveglow import inference as waveglow
from melgan import inference as melgan
from mellotron import inference as mellotron
from utils.argutils import locals2dict

from mellotron.layers import TacotronSTFT
from mellotron.hparams import create_hparams

# 用griffinlim声码器
_hparams = create_hparams()
_stft = TacotronSTFT(_hparams.filter_length, _hparams.hop_length,
                     _hparams.win_length, _hparams.n_mel_channels,
                     _hparams.sampling_rate, _hparams.mel_fmin,
                     _hparams.mel_fmax)

_use_waveglow = 0

_device = 'cuda' if torch.cuda.is_available() else 'cpu'
filename_formatter_re = re.compile(r'[\s\\/:*?"<>|\']+')


def plot_mel_alignment_gate_audio(mel,
                                  alignment,
                                  gate,
                                  audio,
                                  figsize=(16, 16)):
    fig, axes = plt.subplots(4, 1, figsize=figsize)
    axes = axes.flatten()

Beispiel #4

Datei anzeigen

Datei: sdk_api.py Projekt: waws520waws/ttskit

import torch
import aukit
import tqdm
import requests

from waveglow import inference as waveglow
from mellotron import inference as mellotron
from mellotron.layers import TacotronSTFT
from mellotron.hparams import create_hparams

_home_dir = os.path.dirname(os.path.abspath(__file__))

# 用griffinlim声码器
_hparams = create_hparams()
_stft = TacotronSTFT(_hparams.filter_length, _hparams.hop_length,
                     _hparams.win_length, _hparams.n_mel_channels,
                     _hparams.sampling_rate, _hparams.mel_fmin,
                     _hparams.mel_fmax)

_use_waveglow = 0
_device = 'cuda' if torch.cuda.is_available() else 'cpu'

_mellotron_path = os.path.join(_home_dir, 'resource', 'model',
                               'mellotron.kuangdd-rtvc.pt')
_waveglow_path = os.path.join(_home_dir, 'resource', 'model',
                              'waveglow.kuangdd.pt')
_ge2e_path = os.path.join(_home_dir, 'resource', 'model', 'ge2e.kuangdd.pt')
_mellotron_hparams_path = os.path.join(_home_dir, 'resource', 'model',
                                       'mellotron_hparams.json')
_reference_audio_tar_path = os.path.join(_home_dir, 'resource',
                                         'reference_audio.tar')
_audio_tar_path = os.path.join(_home_dir, 'resource', 'audio.tar')