コード例 #1
0
ファイル: module.py プロジェクト: zfzf1990/PaddleHub
    def _initialize(self):
        """
        initialize with the necessary elements
        """
        self.tts_checkpoint_path = os.path.join(self.directory, "assets",
                                                "tts", "step-162000")
        self.waveflow_checkpoint_path = os.path.join(self.directory, "assets",
                                                     "vocoder", "step-2000000")
        self.waveflow_config_path = os.path.join(self.directory, "assets",
                                                 "vocoder",
                                                 "waveflow_ljspeech.yaml")

        tts_config_path = os.path.join(self.directory, "assets", "tts",
                                       "ljspeech.yaml")
        with open(tts_config_path) as f:
            self.tts_config = yaml.load(f, Loader=yaml.Loader)
        with fluid.dygraph.guard(fluid.CPUPlace()):
            self.tts_model = FastSpeechModel(
                self.tts_config['network'],
                num_mels=self.tts_config['audio']['num_mels'])
            io.load_parameters(model=self.tts_model,
                               checkpoint_path=self.tts_checkpoint_path)

            # Build vocoder.
            args = AttrDict()
            args.config = self.waveflow_config_path
            args.use_fp16 = False
            self.waveflow_config = io.add_yaml_config_to_args(args)
            self.waveflow = WaveFlowModule(self.waveflow_config)
            io.load_parameters(model=self.waveflow,
                               checkpoint_path=self.waveflow_checkpoint_path)
コード例 #2
0
    def build(self, training=True):
        """Initialize the model.

        Args:
            training (bool, optional): Whether the model is built for training or inference.
                Defaults to True.

        Returns:
            None
        """
        config = self.config
        dataset = LJSpeech(config, self.nranks, self.rank)
        self.trainloader = dataset.trainloader
        self.validloader = dataset.validloader

        waveflow = WaveFlowModule(config)

        # Dry run once to create and initalize all necessary parameters.
        audio = dg.to_variable(np.random.randn(1, 16000).astype(self.dtype))
        mel = dg.to_variable(
            np.random.randn(1, config.mel_bands, 63).astype(self.dtype))
        waveflow(audio, mel)

        if training:
            optimizer = fluid.optimizer.AdamOptimizer(
                learning_rate=config.learning_rate,
                parameter_list=waveflow.parameters())

            # Load parameters.
            iteration = io.load_parameters(model=waveflow,
                                           optimizer=optimizer,
                                           checkpoint_dir=self.checkpoint_dir,
                                           iteration=config.iteration,
                                           checkpoint_path=config.checkpoint)
            print("Rank {}: checkpoint loaded.".format(self.rank))

            # Data parallelism.
            if self.parallel:
                strategy = dg.parallel.prepare_context()
                waveflow = dg.parallel.DataParallel(waveflow, strategy)

            self.waveflow = waveflow
            self.optimizer = optimizer
            self.criterion = WaveFlowLoss(config.sigma)

        else:
            # Load parameters.
            iteration = io.load_parameters(model=waveflow,
                                           checkpoint_dir=self.checkpoint_dir,
                                           iteration=config.iteration,
                                           checkpoint_path=config.checkpoint)
            print("Rank {}: checkpoint loaded.".format(self.rank))

            for layer in waveflow.sublayers():
                if isinstance(layer, weight_norm.WeightNormWrapper):
                    layer.remove_weight_norm()

            self.waveflow = waveflow

        return iteration
コード例 #3
0
    def __init__(self, config_path, checkpoint_path):
        with open(config_path, 'rt') as f:
            config = ruamel.yaml.safe_load(f)
        ns = argparse.Namespace()
        for k, v in config.items():
            setattr(ns, k, v)
        ns.use_fp16 = False

        self.model = WaveFlowModule(ns)
        io.load_parameters(self.model, checkpoint_path=checkpoint_path)
コード例 #4
0
ファイル: module.py プロジェクト: zfzf1990/PaddleHub
    def _initialize(self):
        """
        initialize with the necessary elements
        """
        self.tts_checkpoint_path = os.path.join(self.directory, "assets",
                                                "tts", "step-120000")
        self.waveflow_checkpoint_path = os.path.join(self.directory, "assets",
                                                     "vocoder", "step-2000000")
        self.waveflow_config_path = os.path.join(self.directory, "assets",
                                                 "vocoder",
                                                 "waveflow_ljspeech.yaml")

        tts_config_path = os.path.join(self.directory, "assets", "tts",
                                       "ljspeech.yaml")
        with open(tts_config_path) as f:
            self.tts_config = yaml.load(f, Loader=yaml.Loader)

        # The max length of audio when synthsis.
        self.max_len = 1000
        # The threshold of stop token which indicates the time step should stop generate spectrum or not.
        self.stop_threshold = 0.5

        with fluid.dygraph.guard(fluid.CPUPlace()):
            # Build TTS.
            with fluid.unique_name.guard():
                network_cfg = self.tts_config['network']
                self.tts_model = TransformerTTSModel(
                    network_cfg['embedding_size'], network_cfg['hidden_size'],
                    network_cfg['encoder_num_head'],
                    network_cfg['encoder_n_layers'],
                    self.tts_config['audio']['num_mels'],
                    network_cfg['outputs_per_step'],
                    network_cfg['decoder_num_head'],
                    network_cfg['decoder_n_layers'])
                io.load_parameters(model=self.tts_model,
                                   checkpoint_path=self.tts_checkpoint_path)

            # Build vocoder.
            args = AttrDict()
            args.config = self.waveflow_config_path
            args.use_fp16 = False
            self.waveflow_config = io.add_yaml_config_to_args(args)
            self.waveflow = WaveFlowModule(self.waveflow_config)
            io.load_parameters(model=self.waveflow,
                               checkpoint_path=self.waveflow_checkpoint_path)
コード例 #5
0
def synthesis_with_waveflow(mel_output, args, checkpoint, place):

    fluid.enable_dygraph(place)
    args.config = args.config_vocoder
    args.use_fp16 = False
    config = io.add_yaml_config_to_args(args)

    mel_spectrogram = fluid.layers.transpose(mel_output, [0, 2, 1])

    # Build model.
    waveflow = WaveFlowModule(config)
    io.load_parameters(model=waveflow, checkpoint_path=checkpoint)
    for layer in waveflow.sublayers():
        if isinstance(layer, weight_norm.WeightNormWrapper):
            layer.remove_weight_norm()

    # Run model inference.
    wav = waveflow.synthesize(mel_spectrogram, sigma=config.sigma)
    return wav.numpy()[0]