def get_tacotron2_alignment_test(text_seq): hparams = hp_tacotron2.create_hparams() hparams.sampling_rate = hp.sample_rate checkpoint_path = os.path.join("Tacotron2", os.path.join("outdir", "checkpoint_51000")) tacotron2 = train_tacotron2.load_model(hparams) tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"]) _ = tacotron2.cuda().eval().half() sequence = np.array(text_to_sequence(text_seq, hp.hparams.text_cleaners))[None, :] print("sequence size", np.shape(sequence)) sequence = torch.autograd.Variable( torch.from_numpy(sequence)).cuda().long() mel, mel_postnet, _, alignment = tacotron2.inference(sequence) wav = audio.inv_mel_spectrogram(mel_postnet.float().data.cpu().numpy()[0]) file_name = text_seq.replace(" ", "_") audio.save_wav(wav, "%s.wav" % file_name) alignment = alignment.float().data.cpu().numpy()[0] print("alignment size", np.shape(alignment)) get_D(alignment) return alignment
def get_tacotron2_alignment_test(text_seq): hparams = hp_tacotron2.create_hparams() hparams.sampling_rate = hp.sample_rate checkpoint_path = os.path.join("Tacotron2", os.path.join( "pre_trained_model", "tacotron2_statedict.pt")) tacotron2 = train_tacotron2.load_model(hparams) tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"]) _ = tacotron2.cuda().eval().half() sequence = np.array(text_to_sequence(text_seq, hp.text_cleaners))[None, :] print("sequence size", np.shape(sequence)) sequence = torch.autograd.Variable( torch.from_numpy(sequence)).cuda().long() mel, mel_postnet, _, alignment = tacotron2.inference(sequence) plot_data((mel.float().data.cpu().numpy()[0], mel_postnet.float().data.cpu().numpy()[0], alignment.float().data.cpu().numpy()[0].T)) alignment = alignment.float().data.cpu().numpy()[0] print("alignment size", np.shape(alignment)) get_D(alignment) return alignment
def get_tacotron2(): hparams = hp_tacotron2.create_hparams() hparams.sampling_rate = hp.sample_rate checkpoint_path = os.path.join("Tacotron2", os.path.join( "pre_trained_model", "tacotron2_statedict.pt")) tacotron2 = train_tacotron2.load_model(hparams) tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"]) _ = tacotron2.cuda().eval().half() return tacotron2
def get_tacotron2_alignment_test(text_seq): hparams = hp_tacotron2.create_hparams() hparams.sampling_rate = hp.sample_rate checkpoint_path = os.path.join("Tacotron2", os.path.join( "pre_trained_model", "tacotron2_statedict.pt")) tacotron2 = train_tacotron2.load_model(hparams) tacotron2.load_state_dict(torch.load(checkpoint_path)["state_dict"]) # print(tacotron2) _ = tacotron2.cuda().eval().half() # sequence = text_seq[None, :] sequence = np.array(text_to_sequence(text_seq, hp.text_cleaners))[None, :] # zeros_pad = np.zeros([1, 12]) # sequence = np.concatenate((sequence, zeros_pad), axis=1) print("sequence size", np.shape(sequence)) sequence = torch.autograd.Variable( torch.from_numpy(sequence)).cuda().long() # print(sequence.size()) # sequence = torch.autograd.Variable( # torch.from_numpy(text_seq).cuda().long()) # sequence = torch.autograd.Variable(text_seq) mel, mel_postnet, _, alignment = tacotron2.inference(sequence) # print(sequence.size()) # print(alignment.size()) # Test Mel # print(mel_postnet.size()) # plot_data((mel.float().data.cpu().numpy()[0], # mel_postnet.float().data.cpu().numpy()[0], # alignment.float().data.cpu().numpy()[0].T)) # mel_postnet = mel_postnet.float().data.cpu().numpy()[0].T # wav = audio.inv_mel_spectrogram(mel_postnet) # audio.save_wav(wav, "test.wav") # plot_data((mel_postnet)) # print(alignment.size()) alignment = alignment.float().data.cpu().numpy()[0] print("alignment size", np.shape(alignment)) # print(alignment) get_D(alignment) return alignment
def loadTacotron2(self): checkpoint_path = os.path.join("TrainedModels", "MelGenerator", "tacotron2_cassie_lee_morris_80.pt") hparams = create_hparams() hparams.sampling_rate = self.tacotron2SampleRate hparams.max_decoder_steps = 3000 hparams.gate_threshold = 0.25 self.tacotron2 = load_model(hparams) self.tacotron2.load_state_dict( torch.load(checkpoint_path, map_location=self.deviceType)['state_dict']) if (self.deviceType.type == "cpu"): self.tacotron2.cpu().eval() else: self.tacotron2.cuda().eval()
help='number of gpus') parser.add_argument('--rank', type=int, default=0, required=False, help='rank of current gpu') parser.add_argument('--group_name', type=str, default='group_name', required=False, help='Distributed group name') parser.add_argument('--hparams', type=str, required=False, help='comma separated name=value pairs') args = parser.parse_args() hparams = create_hparams(args.hparams) torch.backends.cudnn.enabled = hparams.cudnn_enabled torch.backends.cudnn.benchmark = hparams.cudnn_benchmark print("FP16 Run:", hparams.fp16_run) print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling) print("Distributed Run:", hparams.distributed_run) print("cuDNN Enabled:", hparams.cudnn_enabled) print("cuDNN Benchmark:", hparams.cudnn_benchmark) train(args.output_directory, args.log_directory, args.checkpoint_path, args.warm_start, args.n_gpus, args.rank, args.group_name, hparams)
help='number of gpus') parser.add_argument('--rank', type=int, default=0, required=False, help='rank of current gpu') parser.add_argument('--group_name', type=str, default='group_name', required=False, help='Distributed group name') parser.add_argument('--hparams', type=str, required=False, help='comma separated name=value pairs') args = parser.parse_args() hparams = create_hparams() torch.backends.cudnn.enabled = hparams.cudnn_enabled torch.backends.cudnn.benchmark = hparams.cudnn_benchmark print("FP16 Run:", hparams.fp16_run) print("Dynamic Loss Scaling:", hparams.dynamic_loss_scaling) print("Distributed Run:", hparams.distributed_run) print("cuDNN Enabled:", hparams.cudnn_enabled) print("cuDNN Benchmark:", hparams.cudnn_benchmark) train(args.output_directory, args.log_directory, args.checkpoint_path, args.warm_start, args.n_gpus, args.rank, args.group_name, hparams)