def generateAudioGroup(original_audio, ref_audios, autovc_checkpoint = 'checkpoints_fully/autovc_700000.pt', vocoder_checkpoint = "../checkpoint_step001000000_ema.pth"): mel_org = makeSpect(original_audio, None) def pad_seq(x, base=32): len_out = int(base * ceil(float(x.shape[0])/base)) len_pad = len_out - x.shape[0] assert len_pad >= 0 return np.pad(x, ((0,len_pad),(0,0)), 'constant'), len_pad device = 'cuda:0' G = Generator(32,256,512,32).eval().to(device) g_checkpoint = torch.load(autovc_checkpoint, map_location=torch.device('cuda')) G = g_checkpoint.eval() x_org = mel_org x_org, len_pad = pad_seq(x_org) uttr_org = torch.FloatTensor(x_org[np.newaxis, :, :]).to(device) emb_org = get_verification_pytorch_1000(original_audio) emb_refs = [] i = 0 for file in os.listdir(ref_audios): i += 1 print("{}/{}".format(i, len(os.listdir(ref_audios)))) emb_ref = get_verification_pytorch_1000(ref_audios + file, 1) if emb_ref is not None: emb_refs.append(emb_ref) emb_refs = np.mean(emb_refs, axis=0) emb_org = torch.FloatTensor(emb_org).unsqueeze(0).cuda() emb_refs = torch.FloatTensor(emb_refs).unsqueeze(0).cuda() with torch.no_grad(): _, x_identic_psnt, _ = G(uttr_org, emb_org, emb_refs) if len_pad == 0: uttr_trg = x_identic_psnt[0, 0, :, :].cpu().numpy() else: uttr_trg = x_identic_psnt[0, 0, :-len_pad, :].cpu().numpy() device = torch.device("cuda") model = build_model().to(device) checkpoint = torch.load(vocoder_checkpoint, map_location=torch.device('cuda')) model.load_state_dict(checkpoint["state_dict"]) waveform = wavegen(model, c=uttr_trg) return waveform
def generateAudio(original_audio, ref_audio, autovc_checkpoint, vocoder_checkpoint ,english=False): mel_org = makeSpect(original_audio, None) def pad_seq(x, base=32): len_out = int(base * ceil(float(x.shape[0])/base)) len_pad = len_out - x.shape[0] assert len_pad >= 0 return np.pad(x, ((0,len_pad),(0,0)), 'constant'), len_pad device = 'cuda:0' G = Generator(32,256,512,32).eval().to(device) g_checkpoint = torch.load(autovc_checkpoint, map_location=torch.device('cuda')) G = g_checkpoint.eval() x_org = mel_org x_org, len_pad = pad_seq(x_org) uttr_org = torch.FloatTensor(x_org[np.newaxis, :, :]).to(device) emb_org = get_verification_pytorch_1000(original_audio) if not english: emb_ref = get_verification_pytorch_1000(ref_audio) else: emb_ref = get_verification_eng(ref_audio) if emb_org is None or emb_ref is None: return None emb_org = torch.FloatTensor(emb_org).unsqueeze(0).cuda() if not english: emb_ref = torch.FloatTensor(emb_ref).unsqueeze(0).cuda() else: emb_ref = emb_ref.type(torch.cuda.FloatTensor) with torch.no_grad(): _, x_identic_psnt, _ = G(uttr_org, emb_org, emb_ref) if len_pad == 0: uttr_trg = x_identic_psnt[0, 0, :, :].cpu().numpy() else: uttr_trg = x_identic_psnt[0, 0, :-len_pad, :].cpu().numpy() device = torch.device("cuda") model = build_model().to(device) checkpoint = torch.load(vocoder_checkpoint, map_location=torch.device('cuda')) model.load_state_dict(checkpoint["state_dict"]) waveform = wavegen(model, c=uttr_trg) return waveform
def __decode__(self): spect_vc = pickle.load(open('results.pkl', 'rb')) #device = torch.device("cuda") model = build_model()#.to(device) checkpoint = torch.load("checkpoint_step001000000_ema.pth", map_location=torch.device('cpu')) model.load_state_dict(checkpoint["state_dict"]) for spect in spect_vc: name = spect[0] c = spect[1] print(name) waveform = wavegen(model, c=c) save_path = os.path.join("audio/download/audio.wav") librosa.output.write_wav(save_path, waveform, sr=16000) return save_path
args = parser.parse_args() output_path = args.output src_wav_path = args.src_wav src_emb_path = args.src_emb tgt_emb_path = args.tgt_emb vocoder_checkpoint_path = args.vocoder autovc_checkpoint_path = args.autovc dim_neck = 32 dim_emb = 256 dim_pre = 512 freq = 32 device = torch.device('cpu') wavnet = build_model().to(device) checkpoint = torch.load(vocoder_checkpoint_path, map_location=device) wavnet.load_state_dict(checkpoint["state_dict"]) wav = load_wav(src_wav_path) emb = np.load(src_emb_path) emb_tgt = np.load(tgt_emb_path) mel = melspectrogram(wav) pad_len = math.ceil(mel.shape[1] / 32) * 32 - mel.shape[1] mel = np.pad(mel, ((0,0), (0, pad_len)), mode='constant') mel = torch.FloatTensor(mel) emb = torch.FloatTensor(emb) emb_tgt = torch.FloatTensor(emb_tgt)
device = 'cuda:0' g_checkpoint = torch.load(autovc_checkpoint, map_location=torch.device('cuda')) G = g_checkpoint x_org = mel_org x_org, len_pad = pad_seq(x_org) uttr_org = torch.FloatTensor(x_org[np.newaxis, :, :]).to(device) with torch.no_grad(): _, x_identic_psnt, _ = G(uttr_org, emb_ref) if len_pad == 0: uttr_trg = x_identic_psnt[0, 0, :, :].cpu().numpy() else: uttr_trg = x_identic_psnt[0, 0, :-len_pad, :].cpu().numpy() device = torch.device("cuda") model = build_model().to(device) checkpoint = torch.load( "../drive/MyDrive/MultiSpeaker_Tacotron2/checkpoint_step001000000_ema.pth", map_location=torch.device('cuda')) model.load_state_dict(checkpoint["state_dict"]) waveform = wavegen(model, c=uttr_trg) sf.write('{}-{}.wav'.format(original_name, ref_name), waveform, 16000, subtype='PCM_24')
if os.path.exists(subdir_for_wavs) == False: os.makedirs(subdir_for_wavs) sys.path.insert( 1, '/homes/bdoc3/my_data/autovc_data' ) # usually the cwd is priority, so index 1 is good enough for our purposes here from hparams import hparams import torch import librosa import soundfile as sf import pickle from synthesis import build_model from synthesis import wavegen model = build_model().to(config.device) checkpoint = torch.load( "/homes/bdoc3/my_data/autovc_data/checkpoint_step001000000_ema.pth") model.load_state_dict(checkpoint["state_dict"]) model.to(config.device) counter = 0 _, _, fileList = next(os.walk(config.spmel_dir)) numpy_list = [] for numpy_name in fileList: spmel = np.load(os.path.join(config.spmel_dir, numpy_name))[:config.len_crop] numpy_list.append((numpy_name[:-4], torch.tensor(spmel).to(config.device)))
def load_model(): model = build_model().to(device) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint["state_dict"]) return model
def __init__(self, device="cpu", model_path="checkpoint_step001000000_ema.pth"): self.device = device self.model = build_model().to(device) self.model.load_state_dict(torch.load(model_path)["state_dict"])