예제 #1
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model == 'Tacotron-2':
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir

	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams, speaker_id=args.speaker_id)

	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(sentences)):
			start = time.time()
			if args.speaker_id is not None:
				mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=[args.speaker_id[i]])
			else:
				mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=None)

			file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0]))
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
예제 #2
0
def run_synthesis_sytle_transfer(args, synth_metadata_filename,
                                 checkpoint_path, output_dir, hparams):

    synth_dir = os.path.join(output_dir, 'natural')

    #Create output path if it doesn't exist
    os.makedirs(synth_dir, exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)

    texts, basenames, basenames_refs, mel_filenames, \
    mel_ref_filenames_emt, mel_ref_filenames_spk,\
    emt_labels, spk_labels = get_filenames_from_metadata(synth_metadata_filename, args.input_dir, args.flip_spk_emt)

    synth.synthesize(texts,
                     basenames,
                     synth_dir,
                     synth_dir,
                     mel_filenames,
                     mel_ref_filenames_emt=mel_ref_filenames_emt,
                     mel_ref_filenames_spk=mel_ref_filenames_spk,
                     emt_labels_synth=emt_labels,
                     spk_labels_synth=spk_labels)
예제 #3
0
def run_single(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    # Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    # Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]
    # sentences=[[sentences]]
    print(sentences)

    log('Starting Synthesis Single')
    for i, texts in enumerate(tqdm(sentences)):
        start = time.time()
        #basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))]
        #mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None)
        print(texts, eval_dir, log_dir)
        synth.synthesize(texts, None, eval_dir, log_dir, None)

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #4
0
def tacotron_synthesize(sentences):
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # ignore warnings https://stackoverflow.com/questions/47068709/
	output_dir = 'A'
	checkpoint_path = tf.train.get_checkpoint_state('trained_model').model_checkpoint_path
	print('####### checkpoint_path', checkpoint_path)
	synth = Synthesizer()
	synth.load(checkpoint_path)

	os.makedirs(output_dir, exist_ok=True)

	for i, text in enumerate(sentences):
		synth.synthesize(text, i + 1, output_dir, None)

	print('Results at: {}'.format(output_dir))
예제 #5
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, reference_mel=args.reference_audio)
    if args.reference_audio is not None:
        ref_wav = audio.load_wav(args.reference_audio)
        reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T
    else:
        raise ValueError(
            "Evaluation without reference audio. Please provide path to reference audio."
        )
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text,
                                            i + 1,
                                            eval_dir,
                                            log_dir,
                                            None,
                                            reference_mel=reference_mel)
            file.write('{}|{}\n'.format(text, mel_filename))

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #6
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model in ('Both', 'Tacotron-2'):
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir
    
    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None)

            file.write('{}|{}\n'.format(text, mel_filename))
            npy_data = np.load(mel_filename)
            npy_data = npy_data.reshape((-1,))
            npy_data.tofile("f32_for_lpcnet.f32")
		
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #7
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model in ('Both', 'Tacotron-2'):
        assert os.path.normpath(eval_dir) == os.path.normpath(
            args.mels_dir)  # mels_dir = wavenet_input_dir

    # Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            if is_korean_text(text):
                text = normalize_number(text)
                # 한글을 자소 단위로 쪼갠다.
                text = split_to_jamo(text, hparams.cleaners)
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None)

            file.write('{}|{}\n'.format(text, mel_filename))
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #8
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model == 'Tacotron-2':
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	#Set inputs batch wise
	sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)]

	log('Starting Synthesis')
	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, texts in enumerate(tqdm(sentences)):
			start = time.time()
			basenames = ['batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))]
			mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None)

			for elems in zip(texts, mel_filenames, speaker_ids):
				file.write('|'.join([str(x) for x in elems]) + '\n')
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
예제 #9
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model in ('Both', 'Tacotron-2'):
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir
	
	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	
	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(sentences)):
			start = time.time()
			mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None)

			file.write('{}|{}\n'.format(text, mel_filename))
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
예제 #10
0
def run_synthesis(args, checkpoint_path, output_dir):
	metadata_filename = os.path.join(args.input_dir, 'train.txt')
	print(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, gta=args.GTA)
	with open(metadata_filename, encoding='utf-8') as f:
		metadata = [line.strip().split('|') for line in f]
		frame_shift_ms = hparams.hop_size / hparams.sample_rate
		hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
		print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours))

	if args.GTA==True:
		synth_dir = os.path.join(output_dir, 'gta')
	else:
		synth_dir = os.path.join(output_dir, 'natural')

	#Create output path if it doesn't exist
	os.makedirs(synth_dir, exist_ok=True)

	print('starting synthesis')
	mel_dir = os.path.join(args.input_dir, 'mels')
	wav_dir = os.path.join(args.input_dir, 'audio')
	with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
		for i, meta in enumerate(tqdm(metadata)):
			text = meta[5]
			mel_filename = os.path.join(mel_dir, meta[1])
			wav_filename = os.path.join(wav_dir, meta[0])
			mel_output_filename = synth.synthesize(text, None, i+1, synth_dir, None, mel_filename)

			file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename))
	print('synthesized mel spectrograms at {}'.format(synth_dir))
예제 #11
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    print(hparams_debug_string())
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, gta=GTA)
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        print('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    #Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]

    print('Starting Synthesis')
    log('Starting Synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w',
              encoding="utf-8") as file:
        for i, meta in enumerate(tqdm(metadata)):
            texts = [m[5] for m in meta]
            mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
            wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta]
            basenames = [
                os.path.basename(m).replace('.npy', '').replace('mel-', '')
                for m in mel_filenames
            ]
            mel_output_filenames, speaker_ids = synth.synthesize(
                texts, basenames, synth_dir, None, mel_filenames)

            for elems in zip(wav_filenames, mel_filenames,
                             mel_output_filenames, speaker_ids, texts):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    print('synthesized mel spectrograms at {}'.format(synth_dir))
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
예제 #12
0
def run_eval(args, checkpoint_path, output_dir, hparams, text, step, cwd):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    #os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    #os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    log('Starting Synthesis')
    synth.synthesize(text, step, eval_dir, log_dir, None, cwd)

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #13
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    #Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, texts in enumerate(tqdm(sentences)):
            start = time.time()
            basenames = [
                'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))
            ]

            if hparams.tacotron_reference_waveform:
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p225_046.npy"]*len(basenames)
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p226_306.npy"]*len(basenames)
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p297_247.npy"]*len(basenames)
                # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p376_076.npy"]*len(basenames)
                mel_reference_filename = [args.mel_reference] * len(basenames)
            else:
                mel_reference_filename = None

            mel_filenames, speaker_ids = synth.synthesize(
                texts, basenames, eval_dir, log_dir, None,
                mel_reference_filename)

            for elems in zip(texts, mel_filenames, speaker_ids):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #14
0
def run_synthesis(args, checkpoint_path, output_dir, sentences):
	metadata_filename = os.path.join(args.input_dir, 'train.txt')
	print(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, gta=args.GTA)

	wav = load_wav(args.reference_audio)
	reference_mel = melspectrogram(wav).transpose()

	with open(metadata_filename, encoding='utf-8') as f:
		metadata = [line.strip().split('|') for line in f]
		frame_shift_ms = hparams.hop_size / hparams.sample_rate
		hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
		print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours))

	if args.GTA==True:
		synth_dir = os.path.join(output_dir, 'gta')
	else:
		synth_dir = os.path.join(output_dir, 'natural')

	#Create output path if it doesn't exist
	os.makedirs(synth_dir, exist_ok=True)
	os.makedirs(os.path.join(synth_dir, 'wavs/'), exist_ok=True)

	print('starting synthesis')
	with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
		#for i, meta in enumerate(tqdm(metadata)):
			#text = meta[5]
		for i, text in enumerate(tqdm(sentences)):
			mel_output_filename = synth.synthesize(text=text, index=i+1, out_dir=synth_dir, log_dir=None, mel_filename=None, reference_mel=reference_mel)

			mels = np.load(mel_output_filename)
			wav = audio.inv_mel_spectrogram(mels.T)
			audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(i+1)))

			with open(os.path.join(synth_dir, 'wavs/speech-wav-{:05d}.txt'.format(i+1)), 'w') as tf:
				tf.write(text)

			if hparams.predict_linear:
				# save wav (linear -> wav)
				wav = audio.inv_linear_spectrogram(linear.T)
				audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(i+1)))

		#file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename))
	print('synthesized mel spectrograms at {}'.format(synth_dir))
예제 #15
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename, speaker_id = synth.synthesize([text], [i + 1], eval_dir, log_dir, None)

            file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0]))

    log('synthesized mel spectrograms at {}'.format(eval_dir))
예제 #16
0
def run_eval(args, checkpoint_path, output_dir):
    # 	print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path)
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(hparams.sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None)

            file.write('{}|{}\n'.format(text, mel_filename))
예제 #17
0
def run_eval(args, checkpoint_path, output_dir):
	print(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path)
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')
	wav = load_wav(args.reference_audio)
	reference_mel = melspectrogram(wav).transpose()
	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(hparams.sentences)):
			start = time.time()
			mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None, reference_mel)

			file.write('{}|{}\n'.format(text, mel_filename))
	print('synthesized mel spectrograms at {}'.format(eval_dir))
예제 #18
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model == 'Tacotron-2':
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	
	if args.reference_audio is not None:
		ref_wav = audio.load_wav(args.reference_audio,sr=hparams.sample_rate)
		reference_mel = audio.melspectrogram(ref_wav,hparams).astype(np.float32).T
	else:
		#raise ValueError("Evaluation without reference audio. Please provide path to reference audio.")
		reference_mel = None
	synth.load(checkpoint_path, hparams, reference_mel=reference_mel)

	#Set inputs batch wise
	sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)]

	
	log('Starting Synthesis')
	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, texts in enumerate(tqdm(sentences)):
			start = time.time()
			basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))]
			mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None, reference_mel=reference_mel)

			for elems in zip(texts, mel_filenames, speaker_ids):
				file.write('|'.join([str(x) for x in elems]) + '\n')
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
예제 #19
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
	GTA = (args.GTA == 'True')
	if GTA:
		synth_dir = os.path.join(output_dir, 'gta')

		#Create output path if it doesn't exist
		os.makedirs(synth_dir, exist_ok=True)
	else:
		synth_dir = os.path.join(output_dir, 'natural')

		#Create output path if it doesn't exist
		os.makedirs(synth_dir, exist_ok=True)


	metadata_filename = os.path.join(args.input_dir, 'train.txt')
	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams, gta=GTA)
	with open(metadata_filename, encoding='utf-8') as f:
		metadata = [line.strip().split('|') for line in f]
		frame_shift_ms = hparams.hop_size / hparams.sample_rate
		hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
		log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours))

	log('starting synthesis')
	mel_dir = os.path.join(args.input_dir, 'mels')
	wav_dir = os.path.join(args.input_dir, 'audio')
	with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
		for i, meta in enumerate(tqdm(metadata)):
			text = meta[5]
			mel_filename = os.path.join(mel_dir, meta[1])
			wav_filename = os.path.join(wav_dir, meta[0])
			mel_output_filename = synth.synthesize(text, i+1, synth_dir, None, mel_filename)

			file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text))
	log('synthesized mel spectrograms at {}'.format(synth_dir))
	return os.path.join(synth_dir, 'map.txt')
예제 #20
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')
    if args.modify_vae_dim is not None:
        eval_dir += '-modify'
        log_dir += '-modify'
    modify_vae_dim = [int(dim) for dim in args.modify_vae_dim.split(',')
                      ] if args.modify_vae_dim else None

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    if args.reference_mel is not None and args.modify_vae_dim is None:
        synth.load(checkpoint_path, hparams, vae_code_mode='auto')
    elif args.reference_mel is not None and args.modify_vae_dim is not None:
        synth.load(checkpoint_path, hparams, vae_code_mode='modify')
    else:
        synth.load(checkpoint_path, hparams, vae_code_mode='feed')

    #Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        trange = tqdm(sentences)
        for i, texts in enumerate(trange):
            if args.modify_vae_dim is None:
                start = time.time()
                basenames = [
                    'batch_{}_sentence_{}'.format(i, j)
                    for j in range(len(texts))
                ]
                if args.reference_mel is not None:
                    mel_filenames = [
                        args.reference_mel for j in range(len(texts))
                    ]
                    mel_output_filenames, speaker_ids = synth.synthesize(
                        texts, basenames, eval_dir, log_dir, mel_filenames)
                else:
                    mel_output_filenames, speaker_ids = synth.synthesize(
                        texts, basenames, eval_dir, log_dir, None)
                log_dir = None
                #save plots and wavs for the first batch only, for human inspection

                for elems in zip(texts, mel_output_filenames, speaker_ids):
                    file.write('|'.join([str(x) for x in elems]) + '\n')
            else:
                scales = [-2, -1, 0, 1, 2]
                for dim in modify_vae_dim:
                    for scale in scales:
                        start = time.time()
                        basenames = [
                            'dim_{}_batch_{}_sentence_{}_mu+({}*sigma)'.format(
                                dim, i, j, scale) for j in range(len(texts))
                        ]
                        if args.reference_mel is not None:
                            mel_filenames = [
                                args.reference_mel for j in range(len(texts))
                            ]
                            mel_output_filenames, speaker_ids = synth.synthesize(
                                texts, basenames, eval_dir, log_dir,
                                mel_filenames, dim, scale)
                        else:
                            mel_output_filenames, speaker_ids = synth.synthesize(
                                texts, basenames, eval_dir, log_dir, None, dim,
                                scale)

                        trange.set_postfix({
                            'modified_dim':
                            dim,
                            'value':
                            'mu+({}*sigma)'.format(scale)
                        })
                        trange.update(1 / len(scales) / len(modify_vae_dim))
                        trange.refresh()
                        for elems in zip(texts, mel_output_filenames,
                                         speaker_ids):
                            file.write('|'.join(
                                [str(x) for x in elems + (dim, scale)]) + '\n')
                log_dir = None
                #save plots and wavs for the first batch only, for human inspection
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #21
0
def run_synthesis_multiple(args, checkpoint_path, output_dir, hparams,
                           model_suffix):

    n_spk_per_accent = 2
    n_text_per_spk = 5

    synth_dir = os.path.join(output_dir, 'wavs', model_suffix, time_string())
    os.makedirs(synth_dir, exist_ok=True)

    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)

    with open(args.train_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        if args.remove_long_samps:
            len_before = len(metadata)
            metadata = [
                f for f in metadata if not (f[10].endswith('_023.wav'))
            ]
            metadata = [
                f for f in metadata if not (f[10].endswith('_021.wav'))
            ]
            metadata = [f for f in metadata if int(f[6]) < 500]
            print("Removed Long Samples - before: {}, after: {}".format(
                len_before, len(metadata)))

        #only synthesize long samples
        metadata = [f for f in metadata if int(f[6]) > 200]

        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[6]) for x in metadata]) * frame_shift_ms / (3600)
        print('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    df = pd.DataFrame(metadata,
                      columns=[
                          'dataset', 'audio_filename', 'mel_filename',
                          'linear_filename', 'spk_emb_filename', 'time_steps',
                          'mel_frames', 'text', 'emt_label', 'spk_label',
                          'basename', 'sex'
                      ])
    chosen_accents = ['0', '3']
    assert (len(chosen_accents) <= 2)
    acc_names = [
        'American', 'Australian', 'Canadian', 'English', 'Indian', 'Irish',
        'NewZealand', 'NorthernIrish', 'Scottish', 'SouthAfrican', 'Welsh'
    ]
    df_acc = df[df['emt_label'].isin(chosen_accents)]
    # spk_idxs = sorted(frozenset(df_acc['spk_label'].unique()))
    texts = []
    mel_filenames = []
    mel_ref_filenames_emt = []
    mel_ref_filenames_spk = []
    basenames = []
    basenames_refs = []

    for i, acc in enumerate(chosen_accents):
        df_acc_spks = df_acc[df_acc['emt_label'] == acc]['spk_label'].unique()
        chosen_spks = np.random.choice(df_acc_spks,
                                       n_spk_per_accent,
                                       replace=False)

        for spk in chosen_spks:
            df_spk = df_acc[df_acc['spk_label'] == spk]
            idxs = np.random.choice(df_spk.index,
                                    n_text_per_spk,
                                    replace=False)
            for idx in idxs:
                # for j in range(5):
                for acc_ref in chosen_accents:
                    texts.append(df_acc.loc[idx].text)
                    mel_filename = os.path.join(args.input_dir,
                                                df_acc.loc[idx].dataset,
                                                'mels',
                                                df_acc.loc[idx].mel_filename)
                    mel_filenames.append(mel_filename)
                    mel_ref_filenames_spk.append(mel_filename)
                    basenames.append('{}_{}_{}'.format(
                        df_acc.loc[idx].basename.split('.')[0],
                        acc_names[int(acc)][:2], df_acc.loc[idx].sex))

                    df_other_acc = df_acc[df_acc['emt_label'] == acc_ref]
                    row = df_other_acc.loc[np.random.choice(
                        df_other_acc.index, 1)]
                    mel_ref_filenames_emt.append(
                        os.path.join(args.input_dir, row.dataset.iloc[0],
                                     'mels', row.mel_filename.iloc[0]))
                    basenames_refs.append('{}'.format(acc_names[int(
                        row.emt_label)][:2]))  #,j))

    if args.flip_spk_emt:
        mel_ref_filenames_emt_tmp = mel_ref_filenames_emt
        mel_ref_filenames_emt = mel_ref_filenames_spk
        mel_ref_filenames_spk = mel_ref_filenames_emt_tmp

    print('Starting Synthesis on {} samples'.format(
        len(mel_filenames) // len(chosen_accents)))
    synth.synthesize(texts,
                     basenames,
                     synth_dir,
                     synth_dir,
                     mel_filenames,
                     basenames_refs=basenames_refs,
                     mel_ref_filenames_emt=mel_ref_filenames_emt,
                     mel_ref_filenames_spk=mel_ref_filenames_spk)
예제 #22
0
def get_style_embeddings(args, checkpoint_path, output_dir, hparams):

    emb_dir = os.path.join(output_dir, 'embeddings')
    os.makedirs(emb_dir, exist_ok=True)
    meta_path = os.path.join(emb_dir, 'meta.tsv')
    emb_emt_path = os.path.join(emb_dir, 'emb_emt.tsv')
    emb_spk_path = os.path.join(emb_dir, 'emb_spk.tsv')

    with open(args.train_filename, encoding='utf-8') as f:
        metadata = [
            line.strip().split('|') for line in f if not (line.startswith('#'))
        ]

    df_meta = get_metadata_df(args.train_filename, args)

    spk_ids = df_meta.spk_label.unique()
    spk_ids_chosen = np.sort(np.random.choice(spk_ids, args.n_spk))

    #make sure first user is in embeddings (zo - the one with emotions)
    # if not(0 in spk_ids_chosen):
    # 	spk_ids_chosen = np.sort(np.append(spk_ids_chosen,0))

    # if args.unpaired:
    # 	chosen_idx = []
    # 	for id in spk_ids_chosen:
    # 		spk_rows = df_meta[df_meta.loc[:, 'spk_label'] == id]
    # 		chosen_idxs  = np.random.choice(spk_rows.index.values, args.n_per_spk)
    # 		for idx in chosen_idxs:
    # 			row = df_meta
    # 			for i in range(4):
    # 				if i ==0:
    #
    #
    # 	df_meta_chosen = df_meta.iloc[np.array(sorted(chosen_idx))]
    #
    # 	mel_filenames = [os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename) for idx, row in
    # 									 df_meta_chosen.iterrows()]
    #
    #
    # 	texts = list(df_meta_chosen.text)

    chosen_idx = []
    for id in spk_ids_chosen:
        spk_rows = df_meta[df_meta.loc[:, 'spk_label'] == id]
        # if id ==0:
        # 	for emt in range(4):
        # 		emt_rows = spk_rows[spk_rows.loc[:, 'emt_label'] == emt]
        # 		chosen_idx += list(np.random.choice(emt_rows.index.values, args.n_emt))
        # else:
        chosen_idx += list(
            np.random.choice(spk_rows.index.values, args.n_per_spk))

    df_meta_chosen = df_meta.iloc[np.array(sorted(chosen_idx))]

    mel_filenames = [
        os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename)
        for idx, row in df_meta_chosen.iterrows()
    ]
    texts = list(df_meta_chosen.text)

    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)
    print("getting embedding for {} samples".format(len(mel_filenames)))
    emb_emt, emb_spk, emb_mo_emt, emb_mo_spk, emb_cont_emt = synth.synthesize(
        texts,
        None,
        None,
        None,
        mel_filenames,
        mel_ref_filenames_emt=mel_filenames,
        mel_ref_filenames_spk=mel_filenames,
        emb_only=True)

    #SAVE META + EMBEDDING CSVS
    columns_to_keep = [
        'dataset', 'mel_filename', 'mel_frames', 'emt_label', 'spk_label',
        'basename', 'sex'
    ]
    df = df_meta_chosen.loc[:, columns_to_keep]
    df['real'] = 'real'
    df_synth = df.copy()
    df_synth['real'] = 'synth'
    df = pd.concat([df, df_synth])
    df.to_csv(meta_path, sep='\t', index=False)

    # if args.emt_attn:

    # emb_emt = np.vstack((emb_emt, emb_mo_emt))
    emb_spk = np.vstack((emb_spk, emb_mo_spk))

    # pd.DataFrame(emb_emt).to_csv(emb_emt_path,sep='\t',index=False, header=False)
    pd.DataFrame(emb_spk).to_csv(emb_spk_path,
                                 sep='\t',
                                 index=False,
                                 header=False)

    print(len(emb_emt))
    print(emb_emt.shape)
예제 #23
0
import argparse


parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', required=False, help='Full path to model checkpoint', default="tacotron/tmp/tacotron-20180906/model.ckpt")
parser.add_argument('--text', required=False, help='Text to synthesize', default="Hello World")
parser.add_argument('--output', required=False, help='File path of output', default="HelloWorld.wav")
args = parser.parse_args()


checkpoint = str(args.checkpoint)
text = str(args.text)
output = str(args.output)
print("Checkpoint: " + checkpoint)
print("Text: " + text)
print("Output: " + output)
print("")

print("Loading model...")
synthesizer = Synthesizer()
synthesizer.load(checkpoint)
print("Loading model completed!")
print("")

print("Sythesizing text...")
with open(output, 'wb') as file:
    file.write(synthesizer.synthesize(text))
print("Sythesizing text completed!")
print("")

예제 #24
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')
        log_dir = os.path.join(output_dir, 'logs-gta')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
        os.makedirs(log_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')
        log_dir = os.path.join(output_dir, 'logs-natural')
        if args.modify_vae_dim is not None:
            synth_dir += '-modify'
            log_dir += '-modify'
        modify_vae_dim = [int(dim) for dim in args.modify_vae_dim.split(',')
                          ] if args.modify_vae_dim else None

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
        os.makedirs(log_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    if GTA or args.modify_vae_dim is None:
        synth.load(checkpoint_path, hparams, gta=GTA, vae_code_mode='auto')
    else:
        synth.load(checkpoint_path, hparams, gta=GTA, vae_code_mode='modify')
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    #Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
        trange = tqdm(metadata)
        for i, meta in enumerate(trange):
            if GTA or args.modify_vae_dim is None:
                texts = [m[5] for m in meta]
                mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
                wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta]
                basenames = [
                    os.path.basename(m).replace('.npy',
                                                '').replace('mel-', '')
                    for m in mel_filenames
                ]
                mel_output_filenames, speaker_ids = synth.synthesize(
                    texts, basenames, synth_dir, log_dir, mel_filenames)
                log_dir = None
                #save plots and wavs for the first batch only, for human inspection

                for elems in zip(wav_filenames, mel_filenames,
                                 mel_output_filenames, speaker_ids, texts):
                    file.write('|'.join([str(x) for x in elems]) + '\n')
            else:
                scales = [-2, -1, 0, 1, 2]
                for dim in modify_vae_dim:
                    for scale in scales:
                        texts = [m[5] for m in meta]
                        mel_filenames = [
                            os.path.join(mel_dir, m[1]) for m in meta
                        ]
                        wav_filenames = [
                            os.path.join(wav_dir, m[0]) for m in meta
                        ]
                        basenames = [
                            'dim_{}-'.format(dim) +
                            os.path.basename(m).replace('.npy', '').replace(
                                'mel-', '') + '-mu+({}*sigma)'.format(scale)
                            for m in mel_filenames
                        ]
                        mel_output_filenames, speaker_ids = synth.synthesize(
                            texts, basenames, synth_dir, log_dir,
                            mel_filenames, dim, scale)
                        trange.set_postfix({
                            'modified_dim':
                            dim,
                            'value':
                            'mu+({}*sigma)'.format(scale)
                        })
                        trange.update(1 / len(scales) / len(modify_vae_dim) *
                                      len(trange))
                        trange.refresh()
                        for elems in zip(wav_filenames, mel_filenames,
                                         mel_output_filenames, speaker_ids,
                                         texts):
                            file.write('|'.join(
                                [str(x) for x in elems + (dim, scale)]) + '\n')
                break
                #synthesize spectrograms for the first batch only, for human inspection
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
def run_eval(args, checkpoint_path, output_dir, hparams, ppgs, speakers, Lf0s):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, reference_mels=args.reference_audio)

    if args.reference_audio is not None:
        print('reference_audio:', args.reference_audio)
        ref_wav = load_wav(args.reference_audio.strip(), hparams.sample_rate)
        reference_mel = melspectrogram(ref_wav, hparams).astype(np.float32).T
    else:
        if hparams.use_style_encoder == True:
            print("*******************************")
            print(
                "TODO: add style weights when there is no reference audio. Now we use random weights, "
                + "which may generate unintelligible audio sometimes.")
            print("*******************************")
        else:
            #raise ValueError("You must set the reference audio if you don't want to use GSTs.")
            print("233")

    #Set inputs batch wise
    ppgs = [
        ppgs[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(ppgs), hparams.tacotron_synthesis_batch_size)
    ]
    Lf0s = [
        Lf0s[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(Lf0s), hparams.tacotron_synthesis_batch_size)
    ]
    if args.reference_audio is not None:
        reference_mels = [reference_mel] * len(ppgs)

    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:

        for i, texts in enumerate(tqdm(ppgs)):
            start = time.time()
            basenames = [
                'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))
            ]
            if args.reference_audio is not None:
                mel_filenames = synth.synthesize(texts, [speakers[i]],
                                                 basenames, eval_dir, log_dir,
                                                 None, [reference_mels[i]],
                                                 Lf0s[i])
            else:
                mel_filenames = synth.synthesize(texts, [speakers[i]],
                                                 basenames, eval_dir, log_dir,
                                                 None, None, Lf0s[i])

            for elems in zip(texts, mel_filenames, [speakers[i]]):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
예제 #26
0
def synthesize_random(args, checkpoint_path, output_dir, hparams,
                      model_suffix):

    n_emt = 4 if not (args.paired) else 1
    n_txts_per_emotion = 5 if not (args.paired) else 10

    synth_dir = os.path.join(output_dir, 'random', model_suffix, time_string())
    os.makedirs(synth_dir, exist_ok=True)

    synth = Synthesizer()
    synth.load(args, checkpoint_path, hparams)

    meta_save_path = os.path.join(synth_dir, 'meta.csv')

    df = pd.read_csv(
        r'C:\Users\t-mawhit\Documents\code\Tacotron-2\data\zo_jessa_train_test.csv'
    )
    df_train = df[df.train_test == 'train']
    df_test = df[df.train_test == 'test']

    #synthesize 20 random samples from zo and jessa, 5 in each emotion
    #change emotion

    df_test_zo = df_test[df_test.dataset == 'emt4']
    df_test_jessa = df_test[df_test.dataset == 'jessa']

    df_test_use = df_test_jessa if not (
        args.zo) else df_test_zo[df_test_zo.emt_label == 0]

    np.random.seed(2)
    chosen_texts_idxs = np.random.choice(df_test_use.index,
                                         n_txts_per_emotion * n_emt,
                                         replace=False)
    df_test_use_texts_rows = df_test_use.loc[chosen_texts_idxs]
    meta = df_test_use_texts_rows.copy()
    meta['basename'] = ''
    idx = 0

    texts = []
    mel_filenames = []
    mel_ref_filenames_emt = []
    mel_ref_filenames_spk = []
    basenames = []
    basenames_refs = []
    emt_labels = []
    spk_labels = []

    for i in range(n_emt):
        df_test_zo_emt = df_test_zo[df_test_zo.emt_label == i]
        for j in range(n_txts_per_emotion):
            row = df_test_use_texts_rows.iloc[idx]
            texts.append(row.text)
            mel_filenames.append(
                os.path.join(args.input_dir, row.dataset, 'mels',
                             row.mel_filename))

            if args.paired:
                mel_ref_filenames_spk.append(
                    os.path.join(args.input_dir, row.dataset, 'mels',
                                 row.mel_filename))
                mel_ref_filenames_emt.append(
                    os.path.join(args.input_dir, row.dataset, 'mels',
                                 row.mel_filename))
            else:
                row_spk = df_test_use.loc[np.random.choice(df_test_use.index)]
                mel_ref_filenames_spk.append(
                    os.path.join(args.input_dir, row_spk.dataset, 'mels',
                                 row_spk.mel_filename))

                row_emt = df_test_zo_emt.loc[np.random.choice(
                    df_test_zo_emt.index)]
                mel_ref_filenames_emt.append(
                    os.path.join(args.input_dir, row_emt.dataset, 'mels',
                                 row_emt.mel_filename))

            basename = '{}'.format(row.basename.split('.')[0])
            basename_ref = 'e{}'.format(i)

            basenames.append(basename)
            basenames_refs.append(basename_ref)

            emt_label = row_emt.emt_label if not (
                args.paired) else row.emt_label
            spk_label = row_spk.spk_label if not (
                args.paired) else row.spk_label
            emt_labels.append(int(emt_label))
            spk_labels.append(int(spk_label))
            meta.iloc[idx, 8] = emt_label
            meta.iloc[idx, 9] = spk_label
            meta.iloc[idx, 10] = 'mel-{}_{}.npy'.format(basename, basename_ref)

            idx += 1

    meta.to_csv(meta_save_path, index=False)

    print('Starting Synthesis on {} samples'.format(len(mel_filenames)))
    synth.synthesize(texts,
                     basenames,
                     synth_dir,
                     synth_dir,
                     mel_filenames,
                     basenames_refs=basenames_refs,
                     mel_ref_filenames_emt=mel_ref_filenames_emt,
                     mel_ref_filenames_spk=mel_ref_filenames_spk,
                     emt_labels_synth=emt_labels,
                     spk_labels_synth=spk_labels)
예제 #27
0
import argparse

from tacotron.utils import makedirs, str2bool
from tacotron.synthesizer import Synthesizer

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--load_path', required=True)
    parser.add_argument('--sample_path', default="samples")
    parser.add_argument('--text', required=True)
    parser.add_argument('--num_speakers', default=1, type=int)
    parser.add_argument('--speaker_id', default=0, type=int)
    parser.add_argument('--checkpoint_step', default=None, type=int)
    parser.add_argument('--is_korean', default=True, type=str2bool)
    config = parser.parse_args()

    makedirs(config.sample_path)

    synthesizer = Synthesizer()
    synthesizer.load(config.load_path, config.num_speakers,
                     config.checkpoint_step)

    audio = synthesizer.synthesize(texts=[config.text],
                                   base_path=config.sample_path,
                                   speaker_ids=[config.speaker_id],
                                   attention_trim=False,
                                   isKorean=config.is_korean)[0]