コード例 #1
0
ファイル: alignment.py プロジェクト: ssumin6/fastspeech
def get_tacotron2_alignment_test(text_seq):
    hparams = hp_tacotron2.create_hparams()
    hparams.sampling_rate = hp.sample_rate

    checkpoint_path = os.path.join("Tacotron2",
                                   os.path.join("outdir", "checkpoint_51000"))

    tacotron2 = train_tacotron2.load_model(hparams)
    tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"])
    _ = tacotron2.cuda().eval().half()

    sequence = np.array(text_to_sequence(text_seq,
                                         hp.hparams.text_cleaners))[None, :]
    print("sequence size", np.shape(sequence))

    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()

    mel, mel_postnet, _, alignment = tacotron2.inference(sequence)

    wav = audio.inv_mel_spectrogram(mel_postnet.float().data.cpu().numpy()[0])
    file_name = text_seq.replace(" ", "_")

    audio.save_wav(wav, "%s.wav" % file_name)

    alignment = alignment.float().data.cpu().numpy()[0]
    print("alignment size", np.shape(alignment))

    get_D(alignment)

    return alignment
コード例 #2
0
ファイル: alignment.py プロジェクト: scpark20/FastSpeech
def get_tacotron2_alignment_test(text_seq):
    hparams = hp_tacotron2.create_hparams()
    hparams.sampling_rate = hp.sample_rate

    checkpoint_path = os.path.join("Tacotron2", os.path.join(
        "pre_trained_model", "tacotron2_statedict.pt"))

    tacotron2 = train_tacotron2.load_model(hparams)
    tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"])
    _ = tacotron2.cuda().eval().half()

    sequence = np.array(text_to_sequence(text_seq, hp.text_cleaners))[None, :]
    print("sequence size", np.shape(sequence))

    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()

    mel, mel_postnet, _, alignment = tacotron2.inference(sequence)

    plot_data((mel.float().data.cpu().numpy()[0],
               mel_postnet.float().data.cpu().numpy()[0],
               alignment.float().data.cpu().numpy()[0].T))

    alignment = alignment.float().data.cpu().numpy()[0]
    print("alignment size", np.shape(alignment))

    get_D(alignment)

    return alignment
コード例 #3
0
ファイル: alignment.py プロジェクト: scpark20/FastSpeech
def get_tacotron2():
    hparams = hp_tacotron2.create_hparams()
    hparams.sampling_rate = hp.sample_rate

    checkpoint_path = os.path.join("Tacotron2", os.path.join(
        "pre_trained_model", "tacotron2_statedict.pt"))

    tacotron2 = train_tacotron2.load_model(hparams)
    tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"])
    _ = tacotron2.cuda().eval().half()

    return tacotron2
コード例 #4
0
def get_tacotron2_alignment_test(text_seq):
    hparams = hp_tacotron2.create_hparams()
    hparams.sampling_rate = hp.sample_rate

    checkpoint_path = os.path.join("Tacotron2", os.path.join(
        "pre_trained_model", "tacotron2_statedict.pt"))

    tacotron2 = train_tacotron2.load_model(hparams)
    tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"])
    # print(tacotron2)
    _ = tacotron2.cuda().eval().half()

    # sequence = text_seq[None, :]
    sequence = np.array(text_to_sequence(text_seq, hp.text_cleaners))[None, :]
    # zeros_pad = np.zeros([1, 12])
    # sequence = np.concatenate((sequence, zeros_pad), axis=1)
    print("sequence size", np.shape(sequence))

    sequence = torch.autograd.Variable(
        torch.from_numpy(sequence)).cuda().long()
    # print(sequence.size())
    # sequence = torch.autograd.Variable(
    #     torch.from_numpy(text_seq).cuda().long())
    # sequence = torch.autograd.Variable(text_seq)

    mel, mel_postnet, _, alignment = tacotron2.inference(sequence)

    # print(sequence.size())
    # print(alignment.size())

    # Test Mel
    # print(mel_postnet.size())
    # plot_data((mel.float().data.cpu().numpy()[0],
    #            mel_postnet.float().data.cpu().numpy()[0],
    #            alignment.float().data.cpu().numpy()[0].T))

    # mel_postnet = mel_postnet.float().data.cpu().numpy()[0].T
    # wav = audio.inv_mel_spectrogram(mel_postnet)
    # audio.save_wav(wav, "test.wav")
    # plot_data((mel_postnet))

    # print(alignment.size())
    alignment = alignment.float().data.cpu().numpy()[0]
    print("alignment size", np.shape(alignment))
    # print(alignment)

    get_D(alignment)

    return alignment
コード例 #5
0
    def loadTacotron2(self):
        checkpoint_path = os.path.join("TrainedModels", "MelGenerator",
                                       "tacotron2_cassie_lee_morris_80.pt")

        hparams = create_hparams()
        hparams.sampling_rate = self.tacotron2SampleRate
        hparams.max_decoder_steps = 3000
        hparams.gate_threshold = 0.25

        self.tacotron2 = load_model(hparams)
        self.tacotron2.load_state_dict(
            torch.load(checkpoint_path,
                       map_location=self.deviceType)['state_dict'])

        if (self.deviceType.type == "cpu"):
            self.tacotron2.cpu().eval()
        else:
            self.tacotron2.cuda().eval()
コード例 #6
0
                        help='number of gpus')
    parser.add_argument('--rank',
                        type=int,
                        default=0,
                        required=False,
                        help='rank of current gpu')
    parser.add_argument('--group_name',
                        type=str,
                        default='group_name',
                        required=False,
                        help='Distributed group name')
    parser.add_argument('--hparams',
                        type=str,
                        required=False,
                        help='comma separated name=value pairs')

    args = parser.parse_args()
    hparams = create_hparams(args.hparams)

    torch.backends.cudnn.enabled = hparams.cudnn_enabled
    torch.backends.cudnn.benchmark = hparams.cudnn_benchmark

    print("FP16 Run:", hparams.fp16_run)
    print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling)
    print("Distributed Run:", hparams.distributed_run)
    print("cuDNN Enabled:", hparams.cudnn_enabled)
    print("cuDNN Benchmark:", hparams.cudnn_benchmark)

    train(args.output_directory, args.log_directory, args.checkpoint_path,
          args.warm_start, args.n_gpus, args.rank, args.group_name, hparams)
コード例 #7
0
                        help='number of gpus')
    parser.add_argument('--rank',
                        type=int,
                        default=0,
                        required=False,
                        help='rank of current gpu')
    parser.add_argument('--group_name',
                        type=str,
                        default='group_name',
                        required=False,
                        help='Distributed group name')
    parser.add_argument('--hparams',
                        type=str,
                        required=False,
                        help='comma separated name=value pairs')

    args = parser.parse_args()
    hparams = create_hparams()

    torch.backends.cudnn.enabled = hparams.cudnn_enabled
    torch.backends.cudnn.benchmark = hparams.cudnn_benchmark

    print("FP16 Run:", hparams.fp16_run)
    print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling)
    print("Distributed Run:", hparams.distributed_run)
    print("cuDNN Enabled:", hparams.cudnn_enabled)
    print("cuDNN Benchmark:", hparams.cudnn_benchmark)

    train(args.output_directory, args.log_directory, args.checkpoint_path,
          args.warm_start, args.n_gpus, args.rank, args.group_name, hparams)