Python Tacotron2 Examples

Programming Language: Python

Namespace/Package Name: TTS.tts.tf.models.tacotron2

Class/Type: Tacotron2

Examples at hotexamples.com: 3

Python Tacotron2 - 3 examples found. These are the top rated real world Python examples of TTS.tts.tf.models.tacotron2.Tacotron2 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tacotron2(3)

Frequently Used Methods

Tacotron2 (3)

Example #1

Show file

File: test_tacotron2_tf_model.py Project: gerazov/TTS

    def test_forward_attention(
        self,
    ):
        (
            chars_seq,
            chars_seq_lengths,
            mel_spec,
            mel_postnet_spec,
            mel_lengths,
            stop_targets,
            speaker_ids,
        ) = self.generate_dummy_inputs()

        for idx in mel_lengths:
            stop_targets[:, int(idx.item()) :, 0] = 1.0

        stop_targets = stop_targets.view(chars_seq.shape[0], stop_targets.size(1) // c.r, -1)
        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()

        model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, forward_attn=True)
        # training pass
        output = model(chars_seq, chars_seq_lengths, mel_spec, training=True)

        # check model output shapes
        assert np.all(output[0].shape == mel_spec.shape)
        assert np.all(output[1].shape == mel_spec.shape)
        assert output[2].shape[2] == chars_seq.shape[1]
        assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r)
        assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r)

        # inference pass
        output = model(chars_seq, training=False)

Example #2

Show file

File: test_tacotron2_tf_model.py Project: gerazov/TTS

 def test_tflite_conversion(
     self,
 ):  # pylint:disable=no-self-use
     model = Tacotron2(
         num_chars=24,
         num_speakers=0,
         r=3,
         out_channels=80,
         decoder_output_dim=80,
         attn_type="original",
         attn_win=False,
         attn_norm="sigmoid",
         prenet_type="original",
         prenet_dropout=True,
         forward_attn=False,
         trans_agent=False,
         forward_attn_mask=False,
         location_attn=True,
         attn_K=0,
         separate_stopnet=True,
         bidirectional_decoder=False,
         enable_tflite=True,
     )
     model.build_inference()
     convert_tacotron2_to_tflite(model, output_path="test_tacotron2.tflite", experimental_converter=True)
     # init tflite model
     tflite_model = load_tflite_model("test_tacotron2.tflite")
     # fake input
     inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32)  # pylint:disable=unexpected-keyword-arg
     # run inference
     # get input and output details
     input_details = tflite_model.get_input_details()
     output_details = tflite_model.get_output_details()
     # reshape input tensor for the new input shape
     tflite_model.resize_tensor_input(
         input_details[0]["index"], inputs.shape
     )  # pylint:disable=unexpected-keyword-arg
     tflite_model.allocate_tensors()
     detail = input_details[0]
     input_shape = detail["shape"]
     tflite_model.set_tensor(detail["index"], inputs)
     # run the tflite_model
     tflite_model.invoke()
     # collect outputs
     decoder_output = tflite_model.get_tensor(output_details[0]["index"])
     postnet_output = tflite_model.get_tensor(output_details[1]["index"])
     # remove tflite binary
     os.remove("test_tacotron2.tflite")

Example #3

Show file

File: convert_tacotron2_torch_to_tf.py Project: isabella232/MozillaTTS

model = setup_model(num_chars, num_speakers, c)
checkpoint = torch.load(args.torch_model_path,
                        map_location=torch.device('cpu'))
state_dict = checkpoint['model']
model.load_state_dict(state_dict)

# init tf model
model_tf = Tacotron2(num_chars=num_chars,
                     num_speakers=num_speakers,
                     r=model.decoder.r,
                     postnet_output_dim=c.audio['num_mels'],
                     decoder_output_dim=c.audio['num_mels'],
                     attn_type=c.attention_type,
                     attn_win=c.windowing,
                     attn_norm=c.attention_norm,
                     prenet_type=c.prenet_type,
                     prenet_dropout=c.prenet_dropout,
                     forward_attn=c.use_forward_attn,
                     trans_agent=c.transition_agent,
                     forward_attn_mask=c.forward_attn_mask,
                     location_attn=c.location_attn,
                     attn_K=c.attention_heads,
                     separate_stopnet=c.separate_stopnet,
                     bidirectional_decoder=c.bidirectional_decoder)

# set initial layer mapping - these are not captured by the below heuristic approach
# TODO: set layer names so that we can remove these manual matching
common_sufix = '/.ATTRIBUTES/VARIABLE_VALUE'
var_map = [
    ('embedding/embeddings:0', 'embedding.weight'),
    ('encoder/lstm/forward_lstm/lstm_cell_1/kernel:0',