Ejemplo n.º 1
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model in ('Both', 'Tacotron-2'):
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir
	
	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	
	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(sentences)):
			start = time.time()
			mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None)

			file.write('{}|{}\n'.format(text, mel_filename))
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
Ejemplo n.º 2
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')

        # Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')

        # Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, gta=GTA)
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    # Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
        for i, meta in enumerate(tqdm(metadata)):
            texts = [m[5] for m in meta]
            mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
            wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta]
            basenames = [
                os.path.basename(m).replace('.npy', '').replace('mel-', '')
                for m in mel_filenames
            ]
            mel_output_filenames, speaker_ids = synth.synthesize(
                texts, basenames, synth_dir, None, mel_filenames)

            for elems in zip(wav_filenames, mel_filenames,
                             mel_output_filenames, speaker_ids, texts):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 3
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    #Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, texts in enumerate(tqdm(sentences)):
            start = time.time()
            basenames = [
                'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))
            ]

            if hparams.tacotron_reference_waveform:
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p225_046.npy"]*len(basenames)
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p226_306.npy"]*len(basenames)
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p297_247.npy"]*len(basenames)
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p376_076.npy"]*len(basenames)
                mel_reference_filename = [args.mel_reference] * len(basenames)
            else:
                mel_reference_filename = None

            mel_filenames, speaker_ids = synth.synthesize(
                texts, basenames, eval_dir, log_dir, None,
                mel_reference_filename)

            for elems in zip(texts, mel_filenames, speaker_ids):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 4
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences,
             speaker_labels, language_labels):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    #Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]
    speaker_labels = [
        speaker_labels[i:i + hparams.tacotron_synthesis_batch_size] for i in
        range(0, len(speaker_labels), hparams.tacotron_synthesis_batch_size)
    ]
    language_labels = [
        language_labels[i:i + hparams.tacotron_synthesis_batch_size] for i in
        range(0, len(language_labels), hparams.tacotron_synthesis_batch_size)
    ]
    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, texts in enumerate(tqdm(sentences)):
            start = time.time()
            basenames = [
                'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))
            ]
            mel_filenames, speaker_ids = synth.synthesize(
                texts, speaker_labels[i], language_labels[i], basenames,
                eval_dir, log_dir, None)

            for elems in zip(texts, mel_filenames, speaker_ids):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 5
0
def run_synthesis(args, checkpoint_path, output_dir, sentences):
	metadata_filename = os.path.join(args.input_dir, 'train.txt')
	print(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, gta=args.GTA)

	wav = load_wav(args.reference_audio)
	reference_mel = melspectrogram(wav).transpose()

	with open(metadata_filename, encoding='utf-8') as f:
		metadata = [line.strip().split('|') for line in f]
		frame_shift_ms = hparams.hop_size / hparams.sample_rate
		hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
		print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours))

	if args.GTA==True:
		synth_dir = os.path.join(output_dir, 'gta')
	else:
		synth_dir = os.path.join(output_dir, 'natural')

	#Create output path if it doesn't exist
	os.makedirs(synth_dir, exist_ok=True)
	os.makedirs(os.path.join(synth_dir, 'wavs/'), exist_ok=True)

	print('starting synthesis')
	with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
		#for i, meta in enumerate(tqdm(metadata)):
			#text = meta[5]
		for i, text in enumerate(tqdm(sentences)):
			mel_output_filename = synth.synthesize(text=text, index=i+1, out_dir=synth_dir, log_dir=None, mel_filename=None, reference_mel=reference_mel)

			mels = np.load(mel_output_filename)
			wav = audio.inv_mel_spectrogram(mels.T)
			audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(i+1)))

			with open(os.path.join(synth_dir, 'wavs/speech-wav-{:05d}.txt'.format(i+1)), 'w') as tf:
				tf.write(text)

			if hparams.predict_linear:
				# save wav (linear -> wav)
				wav = audio.inv_linear_spectrogram(linear.T)
				audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(i+1)))

		#file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename))
	print('synthesized mel spectrograms at {}'.format(synth_dir))
Ejemplo n.º 6
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, gta=GTA)
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    log('starting synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
        for i, meta in enumerate(tqdm(metadata)):
            text = meta[5]
            mel_filename = os.path.join(mel_dir, meta[1])
            wav_filename = os.path.join(wav_dir, meta[0])
            basename = os.path.basename(mel_filename).replace('.npy',
                                                              '').replace(
                                                                  'mel-', '')
            mel_output_filename, speaker_id = synth.synthesize(
                text, basename, synth_dir, None, mel_filename)

            file.write('{}|{}|{}|{}|{}\n'.format(wav_filename, mel_filename,
                                                 mel_output_filename,
                                                 speaker_id, text))
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 7
0
def run_eval(args, checkpoint_path, output_dir):
    # 	print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path)
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(hparams.sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None)

            file.write('{}|{}\n'.format(text, mel_filename))
Ejemplo n.º 8
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename, speaker_id = synth.synthesize([text], [i + 1], eval_dir, log_dir, None)

            file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0]))

    log('synthesized mel spectrograms at {}'.format(eval_dir))
Ejemplo n.º 9
0
def run_inference(args, checkpoint_path, output_dir, hparams):
    os.makedirs(output_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, gta=False, vae_code_mode='inference')

    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    #Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting inference')
    mel_dir = os.path.join(args.input_dir, 'mels')
    all_embeddings = {}

    trange = tqdm(metadata)
    for i, meta in enumerate(trange):
        mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
        latent_embeddings = synth.inference(mel_filenames)

        for mel_filename, latent_embedding in zip(mel_filenames,
                                                  latent_embeddings):
            all_embeddings[os.path.basename(mel_filename)
                           [4:-4]] = latent_embedding

    log('Saving latent embeddings...')
    with open(os.path.join(output_dir, 'latent_embeddings.pkl'), 'wb') as file:
        pickle.dump(all_embeddings, file)

    log('Latent embeddings saved at {}'.format(output_dir))
    return os.path.join(output_dir, 'latent_embeddings.pkl')
Ejemplo n.º 10
0
def run_eval(args, checkpoint_path, output_dir):
	print(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path)
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')
	wav = load_wav(args.reference_audio)
	reference_mel = melspectrogram(wav).transpose()
	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(hparams.sentences)):
			start = time.time()
			mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None, reference_mel)

			file.write('{}|{}\n'.format(text, mel_filename))
	print('synthesized mel spectrograms at {}'.format(eval_dir))
Ejemplo n.º 11
0
def run_synthesis_sytle_transfer(args, synth_metadata_filename,
                                 checkpoint_path, output_dir, hparams):

    synth_dir = os.path.join(output_dir, 'natural')

    #Create output path if it doesn't exist
    os.makedirs(synth_dir, exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)

    texts, basenames, basenames_refs, mel_filenames, \
    mel_ref_filenames_emt, mel_ref_filenames_spk,\
    emt_labels, spk_labels = get_filenames_from_metadata(synth_metadata_filename, args.input_dir, args.flip_spk_emt)

    synth.synthesize(texts,
                     basenames,
                     synth_dir,
                     synth_dir,
                     mel_filenames,
                     mel_ref_filenames_emt=mel_ref_filenames_emt,
                     mel_ref_filenames_spk=mel_ref_filenames_spk,
                     emt_labels_synth=emt_labels,
                     spk_labels_synth=spk_labels)
Ejemplo n.º 12
0
def run_single(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    # Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    # Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]
    # sentences=[[sentences]]
    print(sentences)

    log('Starting Synthesis Single')
    for i, texts in enumerate(tqdm(sentences)):
        start = time.time()
        #basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))]
        #mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None)
        print(texts, eval_dir, log_dir)
        synth.synthesize(texts, None, eval_dir, log_dir, None)

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 13
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model == 'Tacotron-2':
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir

	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams, speaker_id=args.speaker_id)

	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(sentences)):
			start = time.time()
			if args.speaker_id is not None:
				mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=[args.speaker_id[i]])
			else:
				mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=None)

			file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0]))
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
Ejemplo n.º 14
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model == 'Tacotron-2':
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	
	if args.reference_audio is not None:
		ref_wav = audio.load_wav(args.reference_audio,sr=hparams.sample_rate)
		reference_mel = audio.melspectrogram(ref_wav,hparams).astype(np.float32).T
	else:
		#raise ValueError("Evaluation without reference audio. Please provide path to reference audio.")
		reference_mel = None
	synth.load(checkpoint_path, hparams, reference_mel=reference_mel)

	#Set inputs batch wise
	sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)]

	
	log('Starting Synthesis')
	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, texts in enumerate(tqdm(sentences)):
			start = time.time()
			basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))]
			mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None, reference_mel=reference_mel)

			for elems in zip(texts, mel_filenames, speaker_ids):
				file.write('|'.join([str(x) for x in elems]) + '\n')
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
Ejemplo n.º 15
0
def run_live(args, checkpoint_path, hparams):
    #Log to Terminal without keeping any records in files
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    #Generate fast greeting message
    greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!'
    log(greetings)
    generate_fast(synth, greetings)

    #Interaction loop
    while True:
        try:
            text = input()
            generate_fast(synth, text)

        except KeyboardInterrupt:
            leave = 'Thank you for testing our features. see you soon.'
            log(leave)
            generate_fast(synth, leave)
            sleep(2)
            break
Ejemplo n.º 16
0
def run_live(args, checkpoint_path, hparams):
	#Log to Terminal without keeping any records in files
	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	#Generate fast greeting message
	greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!'
	log(greetings)
	generate_fast(synth, greetings)

	#Interaction loop
	while True:
		try:
			text = input()
			generate_fast(synth, text)

		except KeyboardInterrupt:
			leave = 'Thank you for testing our features. see you soon.'
			log(leave)
			generate_fast(synth, leave)
			sleep(2)
			break
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	delta_size = hparams.tacotron_synthesis_batch_size if hparams.tacotron_synthesis_batch_size < len(sentences) else len(sentences)
	batch_sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), delta_size)]

	start = time.time()
	for i, batch in enumerate(tqdm(batch_sentences)):
		audio.save_wav(synth.eval(batch), os.path.join(log_dir, 'wavs', 'eval_batch_{:03}.wav'.format(i)), hparams)
	log('\nGenerated batches of size {} in {:.3f} sec'.format(delta_size, time.time() - start))

	return eval_dir
Ejemplo n.º 18
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
	GTA = (args.GTA == 'True')
	if GTA:
		synth_dir = os.path.join(output_dir, 'gta')

		#Create output path if it doesn't exist
		os.makedirs(synth_dir, exist_ok=True)
	else:
		synth_dir = os.path.join(output_dir, 'natural')

		#Create output path if it doesn't exist
		os.makedirs(synth_dir, exist_ok=True)


	metadata_filename = os.path.join(args.input_dir, 'train.txt')
	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams, gta=GTA)
	with open(metadata_filename, encoding='utf-8') as f:
		metadata = [line.strip().split('|') for line in f]
		frame_shift_ms = hparams.hop_size / hparams.sample_rate
		hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
		log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours))

	log('starting synthesis')
	mel_dir = os.path.join(args.input_dir, 'mels')
	wav_dir = os.path.join(args.input_dir, 'audio')
	with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
		for i, meta in enumerate(tqdm(metadata)):
			text = meta[5]
			mel_filename = os.path.join(mel_dir, meta[1])
			wav_filename = os.path.join(wav_dir, meta[0])
			mel_output_filename = synth.synthesize(text, i+1, synth_dir, None, mel_filename)

			file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text))
	log('synthesized mel spectrograms at {}'.format(synth_dir))
	return os.path.join(synth_dir, 'map.txt')
def embedding_synthesize(args, hparams, checkpoint, ppgs=None, speakers=None):
    output_dir = args.output_dir

    try:
        checkpoint_path = tf.train.get_checkpoint_state(checkpoint).model_checkpoint_path
        # checkpoint_path = '/home/zhaoxt20/vae_tac_myself/exp_multi_2020.4.1_2DPPgs+ref_same_speaker_dif_sentence/pretrained_model/tacotron_model.ckpt-45000'
        log('loaded model at {}'.format(checkpoint_path))
    except:
        raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint))

    if hparams.tacotron_synthesis_batch_size < hparams.tacotron_num_gpus:
        raise ValueError(
            'Defined synthesis batch size {} is smaller than minimum required {} (num_gpus)! Please verify your synthesis batch size choice.'.format(
                hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus))

    if hparams.tacotron_synthesis_batch_size % hparams.tacotron_num_gpus != 0:
        raise ValueError(
            'Defined synthesis batch size {} is not a multiple of {} (num_gpus)! Please verify your synthesis batch size choice!'.format(
                hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus))

    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, reference_mels=True)

    return run_eval(args, checkpoint_path, output_dir, hparams,synth)
Ejemplo n.º 20
0
def tacotron_synthesize(sentences):
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # ignore warnings https://stackoverflow.com/questions/47068709/
	output_dir = 'A'
	checkpoint_path = tf.train.get_checkpoint_state('trained_model').model_checkpoint_path
	print('####### checkpoint_path', checkpoint_path)
	synth = Synthesizer()
	synth.load(checkpoint_path)

	os.makedirs(output_dir, exist_ok=True)

	for i, text in enumerate(sentences):
		synth.synthesize(text, i + 1, output_dir, None)

	print('Results at: {}'.format(output_dir))
Ejemplo n.º 21
0
def run_eval(args,
             checkpoint_path,
             output_dir,
             hparams,
             sentences,
             flag_to_wav=False,
             checkpoint_eal=None,
             flag_check=False,
             cmu_dict=None):
    #     import pdb
    #     pdb.set_trace()
    #     sentences = sentences[:3]
    log(hparams_debug_string())

    # use the correct synthesizer for the model type
    if args.variant not in ['tacotron_orig', 'tacotron_bk2orig']:
        cfg = Configuration(hparams.sample_rate, hparams.pml_dimension)
        synth = PMLSynthesizer(cfg)
    else:
        synth = Synthesizer()
    synth.load(checkpoint_path,
               hparams,
               model_name=args.variant,
               checkpoint_eal=checkpoint_eal,
               flag_online=args.online)

    if hparams.use_cmudict: sentences = sentences_2_phones(sentences, cmu_dict)
    #     import pdb; pdb.set_trace()
    #     with open('/home/dawna/tts/qd212/models/tacotron/tests/sentences_asup.txt','w') as f:
    #         f.write("\n".join(sentences))
    #     pdb.set_trace()

    if flag_check:
        _eval_check(synth, args, checkpoint_path, output_dir, hparams,
                    sentences, flag_to_wav, checkpoint_eal)
    else:
        _eval_tgt(synth, args, checkpoint_path, output_dir, hparams, sentences,
                  flag_to_wav, checkpoint_eal)
    return
Ejemplo n.º 22
0
def run_eval(args, checkpoint_path, output_dir, hparams, text, step, cwd):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    #os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    #os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    log('Starting Synthesis')
    synth.synthesize(text, step, eval_dir, log_dir, None, cwd)

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 23
0
def get_style_embeddings(args, checkpoint_path, output_dir, hparams):

    emb_dir = os.path.join(output_dir, 'embeddings')
    os.makedirs(emb_dir, exist_ok=True)
    meta_path = os.path.join(emb_dir, 'meta.tsv')
    emb_emt_path = os.path.join(emb_dir, 'emb_emt.tsv')
    emb_spk_path = os.path.join(emb_dir, 'emb_spk.tsv')

    with open(args.train_filename, encoding='utf-8') as f:
        metadata = [
            line.strip().split('|') for line in f if not (line.startswith('#'))
        ]

    df_meta = get_metadata_df(args.train_filename, args)

    spk_ids = df_meta.spk_label.unique()
    spk_ids_chosen = np.sort(np.random.choice(spk_ids, args.n_spk))

    #make sure first user is in embeddings (zo - the one with emotions)
    # if not(0 in spk_ids_chosen):
    # 	spk_ids_chosen = np.sort(np.append(spk_ids_chosen,0))

    # if args.unpaired:
    # 	chosen_idx = []
    # 	for id in spk_ids_chosen:
    # 		spk_rows = df_meta[df_meta.loc[:, 'spk_label'] == id]
    # 		chosen_idxs  = np.random.choice(spk_rows.index.values, args.n_per_spk)
    # 		for idx in chosen_idxs:
    # 			row = df_meta
    # 			for i in range(4):
    # 				if i ==0:
    #
    #
    # 	df_meta_chosen = df_meta.iloc[np.array(sorted(chosen_idx))]
    #
    # 	mel_filenames = [os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename) for idx, row in
    # 									 df_meta_chosen.iterrows()]
    #
    #
    # 	texts = list(df_meta_chosen.text)

    chosen_idx = []
    for id in spk_ids_chosen:
        spk_rows = df_meta[df_meta.loc[:, 'spk_label'] == id]
        # if id ==0:
        # 	for emt in range(4):
        # 		emt_rows = spk_rows[spk_rows.loc[:, 'emt_label'] == emt]
        # 		chosen_idx += list(np.random.choice(emt_rows.index.values, args.n_emt))
        # else:
        chosen_idx += list(
            np.random.choice(spk_rows.index.values, args.n_per_spk))

    df_meta_chosen = df_meta.iloc[np.array(sorted(chosen_idx))]

    mel_filenames = [
        os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename)
        for idx, row in df_meta_chosen.iterrows()
    ]
    texts = list(df_meta_chosen.text)

    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)
    print("getting embedding for {} samples".format(len(mel_filenames)))
    emb_emt, emb_spk, emb_mo_emt, emb_mo_spk, emb_cont_emt = synth.synthesize(
        texts,
        None,
        None,
        None,
        mel_filenames,
        mel_ref_filenames_emt=mel_filenames,
        mel_ref_filenames_spk=mel_filenames,
        emb_only=True)

    #SAVE META + EMBEDDING CSVS
    columns_to_keep = [
        'dataset', 'mel_filename', 'mel_frames', 'emt_label', 'spk_label',
        'basename', 'sex'
    ]
    df = df_meta_chosen.loc[:, columns_to_keep]
    df['real'] = 'real'
    df_synth = df.copy()
    df_synth['real'] = 'synth'
    df = pd.concat([df, df_synth])
    df.to_csv(meta_path, sep='\t', index=False)

    # if args.emt_attn:

    # emb_emt = np.vstack((emb_emt, emb_mo_emt))
    emb_spk = np.vstack((emb_spk, emb_mo_spk))

    # pd.DataFrame(emb_emt).to_csv(emb_emt_path,sep='\t',index=False, header=False)
    pd.DataFrame(emb_spk).to_csv(emb_spk_path,
                                 sep='\t',
                                 index=False,
                                 header=False)

    print(len(emb_emt))
    print(emb_emt.shape)
parser.add_argument('--host', default="localhost", help='Host of Http service')
parser.add_argument(
    '--name',
    help='Name of logging directory if the two models were trained together.')
args = parser.parse_args()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
checkpoint = os.path.join('logs-Tacotron', 'taco_' + args.checkpoint)
try:
    checkpoint_path = tf.train.get_checkpoint_state(
        checkpoint).model_checkpoint_path
    log('loaded model at {}'.format(checkpoint_path))
except:
    raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint))

synth = Synthesizer()
modified_hp = hparams.parse(args.hparams)
synth.load(checkpoint_path, modified_hp)


class Res:
    def on_get(self, req, res):
        res.body = html_body
        res.content_type = "text/html"


class Syn:
    def on_get(self, req, res):
        if not req.params.get('text'):
            raise falcon.HTTPBadRequest()
        log('Synthesize {}'.format(p(req.params.get('text'))))
Ejemplo n.º 25
0
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint',
                    default='pretrained/',
                    help='Path to model checkpoint')
parser.add_argument(
    '--hparams',
    default='',
    help=
    'Hyperparameter overrides as a comma-separated list of name=value pairs')
parser.add_argument('--port', default=9000, help='Port of Http service')
parser.add_argument('--host', default="localhost", help='Host of Http service')
parser.add_argument(
    '--name',
    help='Name of logging directory if the two models were trained together.')
args = parser.parse_args()
synth = Synthesizer()
modified_hp = hparams.parse(args.hparams)
synth.load(args.checkpoint, modified_hp)


class Res:
    def on_get(self, req, res):
        res.body = html_body
        res.content_type = "text/html"


class Syn:
    def on_get(self, req, res):
        if not req.params.get('text'):
            raise falcon.HTTPBadRequest()
        res.data = synth.eval(p(req.params.get('text')))
Ejemplo n.º 26
0
from tacotron.hparams_emmm import hparams, hparams_debug_string
from tacotron.infolog import log
from tacotron.synthesizer import Synthesizer
from tqdm import tqdm
from pypinyin import pinyin, Style
checkpoint_path = os.path.join('taco_model2','tacotron_model.ckpt-100000')
output_dir = os.path.join('taco_output','org')
eval_dir = output_dir
log_dir = os.path.join(output_dir, 'logs-eval')
    #Create output path if it doesn't exist
os.makedirs(eval_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)
os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)
log(hparams_debug_string())
synth = Synthesizer()
synth.load(checkpoint_path, hparams)


from asr_model.model_vgg_ctc import SpeechModel
import os
import platform as plat

#下面这个分配真的是无效的,我也不知道该咋使用才行,脑壳疼呢,
#而且两个模型同时部署总是会有点麻烦,打扰了,想放在cpu上也放不到,谁有能力谁搞吧,
import keras.backend.tensorflow_backend as KTF
config = tf.ConfigProto()
config.gpu_options.allow_growth=True   
session = tf.Session(config=config)
KTF.set_session(session)
Ejemplo n.º 27
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')
        log_dir = os.path.join(output_dir, 'logs-gta')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
        os.makedirs(log_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')
        log_dir = os.path.join(output_dir, 'logs-natural')
        if args.modify_vae_dim is not None:
            synth_dir += '-modify'
            log_dir += '-modify'
        modify_vae_dim = [int(dim) for dim in args.modify_vae_dim.split(',')
                          ] if args.modify_vae_dim else None

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
        os.makedirs(log_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    if GTA or args.modify_vae_dim is None:
        synth.load(checkpoint_path, hparams, gta=GTA, vae_code_mode='auto')
    else:
        synth.load(checkpoint_path, hparams, gta=GTA, vae_code_mode='modify')
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    #Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
        trange = tqdm(metadata)
        for i, meta in enumerate(trange):
            if GTA or args.modify_vae_dim is None:
                texts = [m[5] for m in meta]
                mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
                wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta]
                basenames = [
                    os.path.basename(m).replace('.npy',
                                                '').replace('mel-', '')
                    for m in mel_filenames
                ]
                mel_output_filenames, speaker_ids = synth.synthesize(
                    texts, basenames, synth_dir, log_dir, mel_filenames)
                log_dir = None
                #save plots and wavs for the first batch only, for human inspection

                for elems in zip(wav_filenames, mel_filenames,
                                 mel_output_filenames, speaker_ids, texts):
                    file.write('|'.join([str(x) for x in elems]) + '\n')
            else:
                scales = [-2, -1, 0, 1, 2]
                for dim in modify_vae_dim:
                    for scale in scales:
                        texts = [m[5] for m in meta]
                        mel_filenames = [
                            os.path.join(mel_dir, m[1]) for m in meta
                        ]
                        wav_filenames = [
                            os.path.join(wav_dir, m[0]) for m in meta
                        ]
                        basenames = [
                            'dim_{}-'.format(dim) +
                            os.path.basename(m).replace('.npy', '').replace(
                                'mel-', '') + '-mu+({}*sigma)'.format(scale)
                            for m in mel_filenames
                        ]
                        mel_output_filenames, speaker_ids = synth.synthesize(
                            texts, basenames, synth_dir, log_dir,
                            mel_filenames, dim, scale)
                        trange.set_postfix({
                            'modified_dim':
                            dim,
                            'value':
                            'mu+({}*sigma)'.format(scale)
                        })
                        trange.update(1 / len(scales) / len(modify_vae_dim) *
                                      len(trange))
                        trange.refresh()
                        for elems in zip(wav_filenames, mel_filenames,
                                         mel_output_filenames, speaker_ids,
                                         texts):
                            file.write('|'.join(
                                [str(x) for x in elems + (dim, scale)]) + '\n')
                break
                #synthesize spectrograms for the first batch only, for human inspection
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 28
0
class UIResource:
    def on_get(self, req, res):
        res.content_type = 'text/html'
        res.body = html_body


class SynthesisResource:
    def on_get(self, req, res):
        if not req.params.get('text'):
            raise falcon.HTTPBadRequest()
        res.data = synthesizer.synthesize(req.params.get('text'))
        res.content_type = 'audio/wav'


synthesizer = Synthesizer()
api = falcon.API()
api.add_route('/synthesize', SynthesisResource())
api.add_route('/', UIResource())

if __name__ == '__main__':
    from wsgiref import simple_server
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint',
                        required=True,
                        help='Full path to model checkpoint')
    parser.add_argument('--port', type=int, default=9000)
    parser.add_argument(
        '--hparams',
        default='',
        help=
Ejemplo n.º 29
0
def synthesize_random(args, checkpoint_path, output_dir, hparams,
                      model_suffix):

    n_emt = 4 if not (args.paired) else 1
    n_txts_per_emotion = 5 if not (args.paired) else 10

    synth_dir = os.path.join(output_dir, 'random', model_suffix, time_string())
    os.makedirs(synth_dir, exist_ok=True)

    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)

    meta_save_path = os.path.join(synth_dir, 'meta.csv')

    df = pd.read_csv(
        r'C:\Users\t-mawhit\Documents\code\Tacotron-2\data\zo_jessa_train_test.csv'
    )
    df_train = df[df.train_test == 'train']
    df_test = df[df.train_test == 'test']

    #synthesize 20 random samples from zo and jessa, 5 in each emotion
    #change emotion

    df_test_zo = df_test[df_test.dataset == 'emt4']
    df_test_jessa = df_test[df_test.dataset == 'jessa']

    df_test_use = df_test_jessa if not (
        args.zo) else df_test_zo[df_test_zo.emt_label == 0]

    np.random.seed(2)
    chosen_texts_idxs = np.random.choice(df_test_use.index,
                                         n_txts_per_emotion * n_emt,
                                         replace=False)
    df_test_use_texts_rows = df_test_use.loc[chosen_texts_idxs]
    meta = df_test_use_texts_rows.copy()
    meta['basename'] = ''
    idx = 0

    texts = []
    mel_filenames = []
    mel_ref_filenames_emt = []
    mel_ref_filenames_spk = []
    basenames = []
    basenames_refs = []
    emt_labels = []
    spk_labels = []

    for i in range(n_emt):
        df_test_zo_emt = df_test_zo[df_test_zo.emt_label == i]
        for j in range(n_txts_per_emotion):
            row = df_test_use_texts_rows.iloc[idx]
            texts.append(row.text)
            mel_filenames.append(
                os.path.join(args.input_dir, row.dataset, 'mels',
                             row.mel_filename))

            if args.paired:
                mel_ref_filenames_spk.append(
                    os.path.join(args.input_dir, row.dataset, 'mels',
                                 row.mel_filename))
                mel_ref_filenames_emt.append(
                    os.path.join(args.input_dir, row.dataset, 'mels',
                                 row.mel_filename))
            else:
                row_spk = df_test_use.loc[np.random.choice(df_test_use.index)]
                mel_ref_filenames_spk.append(
                    os.path.join(args.input_dir, row_spk.dataset, 'mels',
                                 row_spk.mel_filename))

                row_emt = df_test_zo_emt.loc[np.random.choice(
                    df_test_zo_emt.index)]
                mel_ref_filenames_emt.append(
                    os.path.join(args.input_dir, row_emt.dataset, 'mels',
                                 row_emt.mel_filename))

            basename = '{}'.format(row.basename.split('.')[0])
            basename_ref = 'e{}'.format(i)

            basenames.append(basename)
            basenames_refs.append(basename_ref)

            emt_label = row_emt.emt_label if not (
                args.paired) else row.emt_label
            spk_label = row_spk.spk_label if not (
                args.paired) else row.spk_label
            emt_labels.append(int(emt_label))
            spk_labels.append(int(spk_label))
            meta.iloc[idx, 8] = emt_label
            meta.iloc[idx, 9] = spk_label
            meta.iloc[idx, 10] = 'mel-{}_{}.npy'.format(basename, basename_ref)

            idx += 1

    meta.to_csv(meta_save_path, index=False)

    print('Starting Synthesis on {} samples'.format(len(mel_filenames)))
    synth.synthesize(texts,
                     basenames,
                     synth_dir,
                     synth_dir,
                     mel_filenames,
                     basenames_refs=basenames_refs,
                     mel_ref_filenames_emt=mel_ref_filenames_emt,
                     mel_ref_filenames_spk=mel_ref_filenames_spk,
                     emt_labels_synth=emt_labels,
                     spk_labels_synth=spk_labels)
Ejemplo n.º 30
0
import argparse

from tacotron.utils import makedirs, str2bool
from tacotron.synthesizer import Synthesizer

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--load_path', required=True)
    parser.add_argument('--sample_path', default="samples")
    parser.add_argument('--text', required=True)
    parser.add_argument('--num_speakers', default=1, type=int)
    parser.add_argument('--speaker_id', default=0, type=int)
    parser.add_argument('--checkpoint_step', default=None, type=int)
    parser.add_argument('--is_korean', default=True, type=str2bool)
    config = parser.parse_args()

    makedirs(config.sample_path)

    synthesizer = Synthesizer()
    synthesizer.load(config.load_path, config.num_speakers,
                     config.checkpoint_step)

    audio = synthesizer.synthesize(texts=[config.text],
                                   base_path=config.sample_path,
                                   speaker_ids=[config.speaker_id],
                                   attention_trim=False,
                                   isKorean=config.is_korean)[0]
Ejemplo n.º 31
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')
    if args.modify_vae_dim is not None:
        eval_dir += '-modify'
        log_dir += '-modify'
    modify_vae_dim = [int(dim) for dim in args.modify_vae_dim.split(',')
                      ] if args.modify_vae_dim else None

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    if args.reference_mel is not None and args.modify_vae_dim is None:
        synth.load(checkpoint_path, hparams, vae_code_mode='auto')
    elif args.reference_mel is not None and args.modify_vae_dim is not None:
        synth.load(checkpoint_path, hparams, vae_code_mode='modify')
    else:
        synth.load(checkpoint_path, hparams, vae_code_mode='feed')

    #Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        trange = tqdm(sentences)
        for i, texts in enumerate(trange):
            if args.modify_vae_dim is None:
                start = time.time()
                basenames = [
                    'batch_{}_sentence_{}'.format(i, j)
                    for j in range(len(texts))
                ]
                if args.reference_mel is not None:
                    mel_filenames = [
                        args.reference_mel for j in range(len(texts))
                    ]
                    mel_output_filenames, speaker_ids = synth.synthesize(
                        texts, basenames, eval_dir, log_dir, mel_filenames)
                else:
                    mel_output_filenames, speaker_ids = synth.synthesize(
                        texts, basenames, eval_dir, log_dir, None)
                log_dir = None
                #save plots and wavs for the first batch only, for human inspection

                for elems in zip(texts, mel_output_filenames, speaker_ids):
                    file.write('|'.join([str(x) for x in elems]) + '\n')
            else:
                scales = [-2, -1, 0, 1, 2]
                for dim in modify_vae_dim:
                    for scale in scales:
                        start = time.time()
                        basenames = [
                            'dim_{}_batch_{}_sentence_{}_mu+({}*sigma)'.format(
                                dim, i, j, scale) for j in range(len(texts))
                        ]
                        if args.reference_mel is not None:
                            mel_filenames = [
                                args.reference_mel for j in range(len(texts))
                            ]
                            mel_output_filenames, speaker_ids = synth.synthesize(
                                texts, basenames, eval_dir, log_dir,
                                mel_filenames, dim, scale)
                        else:
                            mel_output_filenames, speaker_ids = synth.synthesize(
                                texts, basenames, eval_dir, log_dir, None, dim,
                                scale)

                        trange.set_postfix({
                            'modified_dim':
                            dim,
                            'value':
                            'mu+({}*sigma)'.format(scale)
                        })
                        trange.update(1 / len(scales) / len(modify_vae_dim))
                        trange.refresh()
                        for elems in zip(texts, mel_output_filenames,
                                         speaker_ids):
                            file.write('|'.join(
                                [str(x) for x in elems + (dim, scale)]) + '\n')
                log_dir = None
                #save plots and wavs for the first batch only, for human inspection
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 32
0
import argparse


parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', required=False, help='Full path to model checkpoint', default="tacotron/tmp/tacotron-20180906/model.ckpt")
parser.add_argument('--text', required=False, help='Text to synthesize', default="Hello World")
parser.add_argument('--output', required=False, help='File path of output', default="HelloWorld.wav")
args = parser.parse_args()


checkpoint = str(args.checkpoint)
text = str(args.text)
output = str(args.output)
print("Checkpoint: " + checkpoint)
print("Text: " + text)
print("Output: " + output)
print("")

print("Loading model...")
synthesizer = Synthesizer()
synthesizer.load(checkpoint)
print("Loading model completed!")
print("")

print("Sythesizing text...")
with open(output, 'wb') as file:
    file.write(synthesizer.synthesize(text))
print("Sythesizing text completed!")
print("")

Ejemplo n.º 33
0
def run_synthesis_multiple(args, checkpoint_path, output_dir, hparams,
                           model_suffix):

    n_spk_per_accent = 2
    n_text_per_spk = 5

    synth_dir = os.path.join(output_dir, 'wavs', model_suffix, time_string())
    os.makedirs(synth_dir, exist_ok=True)

    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)

    with open(args.train_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        if args.remove_long_samps:
            len_before = len(metadata)
            metadata = [
                f for f in metadata if not (f[10].endswith('_023.wav'))
            ]
            metadata = [
                f for f in metadata if not (f[10].endswith('_021.wav'))
            ]
            metadata = [f for f in metadata if int(f[6]) < 500]
            print("Removed Long Samples - before: {}, after: {}".format(
                len_before, len(metadata)))

        #only synthesize long samples
        metadata = [f for f in metadata if int(f[6]) > 200]

        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[6]) for x in metadata]) * frame_shift_ms / (3600)
        print('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    df = pd.DataFrame(metadata,
                      columns=[
                          'dataset', 'audio_filename', 'mel_filename',
                          'linear_filename', 'spk_emb_filename', 'time_steps',
                          'mel_frames', 'text', 'emt_label', 'spk_label',
                          'basename', 'sex'
                      ])
    chosen_accents = ['0', '3']
    assert (len(chosen_accents) <= 2)
    acc_names = [
        'American', 'Australian', 'Canadian', 'English', 'Indian', 'Irish',
        'NewZealand', 'NorthernIrish', 'Scottish', 'SouthAfrican', 'Welsh'
    ]
    df_acc = df[df['emt_label'].isin(chosen_accents)]
    # spk_idxs = sorted(frozenset(df_acc['spk_label'].unique()))
    texts = []
    mel_filenames = []
    mel_ref_filenames_emt = []
    mel_ref_filenames_spk = []
    basenames = []
    basenames_refs = []

    for i, acc in enumerate(chosen_accents):
        df_acc_spks = df_acc[df_acc['emt_label'] == acc]['spk_label'].unique()
        chosen_spks = np.random.choice(df_acc_spks,
                                       n_spk_per_accent,
                                       replace=False)

        for spk in chosen_spks:
            df_spk = df_acc[df_acc['spk_label'] == spk]
            idxs = np.random.choice(df_spk.index,
                                    n_text_per_spk,
                                    replace=False)
            for idx in idxs:
                # for j in range(5):
                for acc_ref in chosen_accents:
                    texts.append(df_acc.loc[idx].text)
                    mel_filename = os.path.join(args.input_dir,
                                                df_acc.loc[idx].dataset,
                                                'mels',
                                                df_acc.loc[idx].mel_filename)
                    mel_filenames.append(mel_filename)
                    mel_ref_filenames_spk.append(mel_filename)
                    basenames.append('{}_{}_{}'.format(
                        df_acc.loc[idx].basename.split('.')[0],
                        acc_names[int(acc)][:2], df_acc.loc[idx].sex))

                    df_other_acc = df_acc[df_acc['emt_label'] == acc_ref]
                    row = df_other_acc.loc[np.random.choice(
                        df_other_acc.index, 1)]
                    mel_ref_filenames_emt.append(
                        os.path.join(args.input_dir, row.dataset.iloc[0],
                                     'mels', row.mel_filename.iloc[0]))
                    basenames_refs.append('{}'.format(acc_names[int(
                        row.emt_label)][:2]))  #,j))

    if args.flip_spk_emt:
        mel_ref_filenames_emt_tmp = mel_ref_filenames_emt
        mel_ref_filenames_emt = mel_ref_filenames_spk
        mel_ref_filenames_spk = mel_ref_filenames_emt_tmp

    print('Starting Synthesis on {} samples'.format(
        len(mel_filenames) // len(chosen_accents)))
    synth.synthesize(texts,
                     basenames,
                     synth_dir,
                     synth_dir,
                     mel_filenames,
                     basenames_refs=basenames_refs,
                     mel_ref_filenames_emt=mel_ref_filenames_emt,
                     mel_ref_filenames_spk=mel_ref_filenames_spk)