Ejemplo n.º 1
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')

    else:
        synth_dir = os.path.join(output_dir, 'natural')

    os.makedirs(synth_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, gta=GTA)
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    log('starting synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
        for i, meta in enumerate(tqdm(metadata)):
            text = meta[5]
            mel_filename = os.path.join(mel_dir, meta[1])
            wav_filename = os.path.join(wav_dir, meta[0])
            mel_output_filename = synth.synthesize(text, i + 1, synth_dir,
                                                   None, mel_filename)
            file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename,
                                              mel_output_filename, text))
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 2
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
	eval_dir = os.path.join(output_dir, 'eval')
	log_dir = os.path.join(output_dir, 'logs-eval')

	if args.model in ('Both', 'Tacotron-2'):
		assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir
	
	#Create output path if it doesn't exist
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(log_dir, exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
	os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	
	with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
		for i, text in enumerate(tqdm(sentences)):
			start = time.time()
			mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None)

			file.write('{}|{}\n'.format(text, mel_filename))
	log('synthesized mel spectrograms at {}'.format(eval_dir))
	return eval_dir
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron'))
    parser.add_argument('--output', default='training')
    parser.add_argument('--dataset',
                        required=True,
                        choices=['blizzard', 'ljspeech', 'nick'])
    parser.add_argument('--num_workers', type=int, default=cpu_count())
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--validation_size', type=int, default=0)
    parser.add_argument('--test_size', type=int, default=0)

    args = parser.parse_args()
    hparams.parse(args.hparams)
    log(hparams_debug_string())

    if args.dataset == 'blizzard':
        preprocess_blizzard(args, hparams)
    elif args.dataset == 'ljspeech':
        preprocess_ljspeech(args, hparams)
    elif args.dataset == 'nick':
        preprocess_nick(args, hparams)
Ejemplo n.º 4
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, reference_mel=args.reference_audio)
    if args.reference_audio is not None:
        ref_wav = audio.load_wav(args.reference_audio)
        reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T
    else:
        raise ValueError(
            "Evaluation without reference audio. Please provide path to reference audio."
        )
    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text,
                                            i + 1,
                                            eval_dir,
                                            log_dir,
                                            None,
                                            reference_mel=reference_mel)
            file.write('{}|{}\n'.format(text, mel_filename))

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 5
0
def setup_log(log_path, checkpoint_path, input_path):
    infolog.init(log_path, 'emt4_disc', None)
    log('hi')
    log('Checkpoint path: {}'.format(checkpoint_path))
    log('Loading training data from: {}'.format(input_path))
    log('Using model: {}'.format('emt4_disc'))
    log(hparams_debug_string())
Ejemplo n.º 6
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(
            args.mels_dir)  #mels_dir = wavenet_input_dir

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None)

            file.write('{}|{}\n'.format(text, mel_filename))
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 7
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model in ('Both', 'Tacotron-2'):
        assert os.path.normpath(eval_dir) == os.path.normpath(
            args.mels_dir)  #mels_dir = wavenet_input_dir

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            start = time.time()
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None)
            #file.write('{}|{}\n'.format(text, mel_filename))
            #npy_data = np.load(mel_filename)
            #npy_data = npy_data.reshape((-1,))
            #npy_data.tofile("f32_for_lpcnet.f32")

            print("Features f32 file created for text")
            end = time.time()
            print(">>>>>LPCNet Feature to PCM Conversion time = {}".format(
                end - start))

    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
def init_tacotron2(args):
    # t2
    print('\n#####################################')
    if args.model == 'Tacotron':
        print('\nInitialising Tacotron Model...\n')
        t2_hparams = hparams.parse(args.hparams)
        try:
            checkpoint_path = tf.train.get_checkpoint_state(
                args.taco_checkpoint).model_checkpoint_path
            log('loaded model at {}'.format(checkpoint_path))
        except:
            raise RuntimeError('Failed to load checkpoint at {}'.format(
                args.taco_checkpoint))

        output_dir = 'tacotron_' + args.output_dir
        eval_dir = os.path.join(output_dir, 'eval')
        log_dir = os.path.join(output_dir, 'logs-eval')
        print('eval_dir:', eval_dir)
        print('args.mels_dir:', args.mels_dir)

        # Create output path if it doesn't exist
        os.makedirs(eval_dir, exist_ok=True)
        os.makedirs(log_dir, exist_ok=True)
        os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
        os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)
        log(hparams_debug_string())
        synth = Synthesizer()
        synth.load(checkpoint_path, t2_hparams)

    return synth, eval_dir, log_dir
Ejemplo n.º 9
0
def run_eval(args):
  print(hparams_debug_string())
  is_teacher_force = False
  mel_targets = args.mel_targets
  reference_mel = None
  if args.mel_targets is not None:
    is_teacher_force = True
    mel_targets = np.load(args.mel_targets)
  synth = Synthesizer(teacher_forcing_generating=is_teacher_force)
  synth.load(args.checkpoint, args.reference_audio)
  base_path = get_output_base_path(args.checkpoint)

  if args.reference_audio is not None:
    ref_wav = audio.load_wav(args.reference_audio)
    reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T
    path = '%s_ref-%s.wav' % (base_path, os.path.splitext(os.path.basename(args.reference_audio))[0])
    alignment_path = '%s_ref-%s-align.png' % (base_path, os.path.splitext(os.path.basename(args.reference_audio))[0])
  else:
    if hparams.use_gst:
      print("*******************************")
      print("TODO: add style weights when there is no reference audio. Now we use random weights, " + 
             "which may generate unintelligible audio sometimes.")
      print("*******************************")
      path = '%s_ref-randomWeight.wav' % (base_path)
      alignment_path = '%s_ref-%s-align.png' % (base_path, 'randomWeight')
    else:
      raise ValueError("You must set the reference audio if you don't want to use GSTs.")

  with open(path, 'wb') as f:
    print('Synthesizing: %s' % args.text)
    print('Output wav file: %s' % path)
    print('Output alignments: %s' % alignment_path)
    f.write(synth.synthesize(args.text, mel_targets=mel_targets, reference_mel=reference_mel, alignment_path=alignment_path))
Ejemplo n.º 10
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    log_dir = os.path.join(output_dir, 'plots')
    wav_dir = os.path.join(output_dir, 'wavs')

    #We suppose user will provide correct folder depending on training method
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    if args.model == 'Tacotron-2':
        #If running all Tacotron-2, synthesize audio from evaluated mels
        metadata_filename = os.path.join(args.mels_dir, 'map.txt')
        with open(metadata_filename, encoding='utf-8') as f:
            metadata = [line.strip().split('|') for line in f]
            frame_shift_ms = hparams.hop_size / hparams.sample_rate
            hours = sum([int(x[-1])
                         for x in metadata]) * frame_shift_ms / (3600)
            log('Loaded metadata for {} examples ({:.2f} hours)'.format(
                len(metadata), hours))

        metadata = np.array(metadata)
        speaker_ids = metadata[:, 2]
        mel_files = metadata[:, 1]
        texts = metadata[:, 0]
    else:
        #else Get all npy files in input_dir (supposing they are mels)
        mel_files = [
            os.path.join(args.mels_dir, f) for f in os.listdir(args.mels_dir)
            if f.split('.')[-1] == 'npy'
        ]
        speaker_ids = args.speaker_id
        texts = None

    log('Starting synthesis! (this will take a while..)')
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(wav_dir, exist_ok=True)

    with open(os.path.join(wav_dir, 'map.txt'), 'w') as file:
        for i, mel_file in enumerate(tqdm(mel_files)):
            mel_spectro = np.load(mel_file)
            if hparams.normalize_for_wavenet:
                #[-max, max] or [0,max]
                T2_output_range = (
                    -hparams.max_abs_value,
                    hparams.max_abs_value) if hparams.symmetric_mels else (
                        0, hparams.max_abs_value)
                #rerange to [0, 1]
                mel_spectro = np.interp(mel_spectro, T2_output_range, (0, 1))

            basename = mel_file.replace('.npy', '')
            speaker_id = speaker_ids[i]
            audio_file = synth.synthesize(mel_spectro, speaker_id, basename,
                                          wav_dir, log_dir)

            if texts is None:
                file.write('{}|{}\n'.format(mel_file, audio_file))
            else:
                file.write('{}|{}|{}\n'.format(texts[i], mel_file, audio_file))

    log('synthesized audio waveforms at {}'.format(wav_dir))
Ejemplo n.º 11
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model in ('Both', 'Tacotron-2'):
        assert os.path.normpath(eval_dir) == os.path.normpath(
            args.mels_dir)  # mels_dir = wavenet_input_dir

    # Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            values = text.split('|')
            if len(values) == 1:
                raise ValueError('invalid "speaker_id|text" format')
            speak_id = values[0]
            text = values[1]
            if is_korean_text(text):
                text = normalize_number(text)
                # 한글을 자소 단위로 쪼갠다.
                text = split_to_jamo(text, hparams.cleaners)
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None, speak_id)

            file.write('{}|{}\n'.format(text, mel_filename))
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 12
0
def run_live(args, checkpoint_path, hparams):
    # Log to Terminal without keeping any records in files
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    # Generate fast greeting message
    greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!'
    log(greetings)
    generate_fast(synth, greetings)

    # Interaction loop
    while True:
        try:
            text = input()
            if text == 'quit':
                break
            if args.speaker_id is None:
                speaker_id = random.choice(list(range(1, args.num_speakers)))
            else:
                speaker_id = args.speaker_id
            if text:
                generate_fast(synth, text, speaker_id)

        except KeyboardInterrupt:
            leave = 'Thank you for testing our features. see you soon.'
            log(leave)
            generate_fast(synth, leave)
            sleep(2)
            break
Ejemplo n.º 13
0
def run_synthesis(checkpoint_path, output_dir, hparams):
    log_dir = os.path.join(output_dir, 'plots')
    wav_dir = os.path.join(output_dir, 'wavs')
    embed_dir = os.path.join(output_dir, 'embeddings')


    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    metadata_filename = os.path.join(hparams.wavenet_synth, 'map.txt')
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = np.array([line.strip().split('|') for line in f])
        if (hparams.synth_mode == "all") and (hparams.synth_idx != None):
            # if synth mode is all and synth_idx is not None, extract a part of metadata
            metadata = metadata[hparams.synth_idx[0]:hparams.synth_idx[1], :]


    # speaker ids from trained speakers list
    speaker_ids = metadata[:, 3]
    print("spk_ids" +str(speaker_ids.shape))
    mel_files = metadata[:, 1]
    print("mel_files" +str(mel_files.shape))

    log('Starting synthesis! (this will take a while..)')
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(wav_dir, exist_ok=True)
    os.makedirs(embed_dir, exist_ok=True)

    synth_dict = load_synthesis_dict()

    for idx, mel_file in enumerate(tqdm(mel_files)):
        print("idx")
        print(idx)
        mel_spectro = [np.load(mel_file)]
        basenames = [os.path.basename(mel_file).replace('.npy', '')]
        speaker_id = [speaker_ids[idx]]
        print("synthesizing {}".format(basenames[0]))

        if hparams.synth_mode == "all":
            if basenames[0].split('-')[1] in synth_dict.keys():
                print("Synthesizing both wav and embedding")
                synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=False)
            else:
                print("Synthesizing embedding only")
                synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=True)
        elif hparams.synth_mode == "embedding":
            print("Synthesizing embedding only")
            synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=True)
        elif hparams.synth_mode == "wav":
            if basenames[0].split('-')[1] in synth_dict.keys():
                synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=False)
        else:
            print("Not supported synth mode.")




    log('synthesized audio waveforms at {}'.format(wav_dir))
Ejemplo n.º 14
0
def prepare_run(args):
    modified_hp = hparams.parse(args.hparams)
    print(hparams_debug_string())
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level)
    run_name = args.name or args.model
    log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name))
    os.makedirs(log_dir, exist_ok=True)
    return log_dir, modified_hp
Ejemplo n.º 15
0
def main():
    args = docopt(__doc__)
    print("Command line args:\n", args)
    checkpoint_dir = args["--checkpoint-dir"]
    source_data_root = args["--source-data-root"]
    target_data_root = args["--target-data-root"]
    selected_list_dir = args["--selected-list-dir"]
    use_multi_gpu = args["--multi-gpus"]

    if args["--hparam-json-file"]:
        with open(args["--hparam-json-file"]) as f:
            json = "".join(f.readlines())
            hparams.parse_json(json)

    hparams.parse(args["--hparams"])

    training_list = list(load_key_list("train.csv", selected_list_dir))
    validation_list = list(load_key_list("validation.csv", selected_list_dir))

    training_source_files = [
        os.path.join(source_data_root,
                     f"{key}.{hparams.source_file_extension}")
        for key in training_list
    ]
    training_target_files = [
        os.path.join(target_data_root,
                     f"{key}.{hparams.target_file_extension}")
        for key in training_list
    ]
    validation_source_files = [
        os.path.join(source_data_root,
                     f"{key}.{hparams.source_file_extension}")
        for key in validation_list
    ]
    validation_target_files = [
        os.path.join(target_data_root,
                     f"{key}.{hparams.target_file_extension}")
        for key in validation_list
    ]

    print("training source", len(training_source_files))
    print("training target", len(training_target_files))

    log = logging.getLogger("tensorflow")
    log.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh = logging.FileHandler(hparams.logfile)
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    log.addHandler(fh)
    tf.logging.set_verbosity(tf.logging.INFO)

    tf.logging.info(hparams_debug_string())

    train_and_evaluate(hparams, checkpoint_dir, training_source_files,
                       training_target_files, validation_source_files,
                       validation_target_files, use_multi_gpu)
Ejemplo n.º 16
0
def run_synthesis(args, checkpoint_path, output_dir):

    _p_cmudict = 0.5
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
    metadata_filename = os.path.join(args.input_dir, 'train.txt')

    if hparams.use_cmudict:
        cmudict_path = os.path.join(os.path.dirname(metadata_filename),
                                    'cmudict-0.7b')
        if not os.path.isfile(cmudict_path):
            raise Exception(
                'If use_cmudict=True, you must download cmu dictionary first. '
                +
                'Run shell as:\n wget -P %s http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b'
                % self._datadir)
        _cmudict = cmudict.CMUDict(cmudict_path, keep_ambiguous=False)
        log('Loaded CMUDict with %d unambiguous entries' % len(_cmudict))
    else:
        _cmudict = None

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, gta=GTA)
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]

        log('starting synthesis')
        mel_dir = os.path.join(args.input_dir, 'mels')
        wav_dir = os.path.join(args.input_dir, 'linear')

        with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
            for i, meta in enumerate(tqdm(metadata)):

                _punctuation_re = re.compile(r'([\.,"\-_:]+)')
                text = re.sub(_punctuation_re, r' \1 ', meta[3])
                if _cmudict and random.random() < _p_cmudict:
                    text = ' '.join([
                        maybe_get_arpabet(_cmudict, word)
                        for word in text.split(' ')
                    ])
                mel_filename = os.path.join(mel_dir, meta[1])
                wav_filename = os.path.join(wav_dir, meta[0])
                mel_output_filename = synth.synthesize(text, i + 1, synth_dir,
                                                       None, mel_filename)
                file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename,
                                                  mel_output_filename, text))
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 17
0
def run_eval(args):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args.checkpoint)
    base_path = get_output_base_path(args.checkpoint)
    path = '%s-_%s_.wav' % (base_path, args.sentence)
    print('Synthesizing: %s - %s' % (path, args.sentence))
    file_handle = open(path, 'wb')
    file_handle.write(synth.synthesize(args.sentence))
Ejemplo n.º 18
0
def run_eval(args, text):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args.checkpoint)
    base_path = get_output_base_path(args.checkpoint)
    path = '%s.wav' % (text)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
        f.write(synth.synthesize(text))
Ejemplo n.º 19
0
def run_eval(args):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args.checkpoint)
    base_path = get_output_base_path(args.checkpoint)
    with open(
            '{}-eval.wav'.format(args.ref.split('/')[-1].replace('.wav', '')),
            'wb') as f:
        f.write(synth.synthesize(args.ref))
Ejemplo n.º 20
0
def run_eval(args, checkpoint_path):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path)
    for i, text in enumerate(sentences):
        start = time.time()
        synth.synthesize(text, i, args.output_dir)
        print('synthesized sentence n°{} in {:.3f} sec'.format(
            i + 1,
            time.time() - start))
Ejemplo n.º 21
0
def run_eval(args):
  print(hparams_debug_string())
  synth = Synthesizer()
  synth.load(args.checkpoint)
  base_path = get_output_base_path(args.checkpoint)
  for i, text in enumerate(sentences):
    path = '%s-%d.wav' % (base_path, i)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
      f.write(synth.synthesize(text))
Ejemplo n.º 22
0
def run_eval(args):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args.checkpoint)
    base_path = get_output_base_path(args.checkpoint)
    for i, text in enumerate(sentences):
        path = '%s-%d.wav' % (base_path, i)
        print('Synthesizing: %s' % path)
        with open(path, 'wb') as f:
            f.write(synth.synthesize(text))
Ejemplo n.º 23
0
def main():
    args = get_args()
    if args.preset is not None:
        with open(args.preset) as f:
            hparams.parse_json(f.read())

    modified_hp = hparams.parse(args.hparams)
    print(hparams_debug_string())
    synthesis(args.checkpoint_path, args.local_path, args.global_id,
              args.output_dir, modified_hp)
Ejemplo n.º 24
0
def run_eval(ckpt_dir):
  print(hparams_debug_string())
  checkpoint = tf.train.get_checkpoint_state(ckpt_dir).model_checkpoint_path
  synth = Synthesizer()
  synth.load(checkpoint)
  base_path = get_output_base_path(checkpoint)
  for i, text in enumerate(sentences):
    path = '%s-%03d.wav' % (base_path, i)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
      f.write(synth.synthesize(text))
Ejemplo n.º 25
0
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model == 'Tacotron-2':
        assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)

    #Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    print(hparams_debug_string())
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    #Set inputs batch wise
    sentences = [
        sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(
            0, len(sentences), hparams.tacotron_synthesis_batch_size)
    ]

    print('Starting Synthesis')
    log('Starting Synthesis')
    with open(os.path.join(eval_dir, 'map.txt'), 'w',
              encoding="utf-8") as file:
        for i, texts in enumerate(tqdm(sentences)):
            start = time.time()
            basenames = [
                'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))
            ]
            mel_filenames, speaker_ids = synth.synthesize(
                texts, basenames, eval_dir, log_dir, None)

            for elems in zip(texts, mel_filenames, speaker_ids):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    print('synthesized mel spectrograms at {}'.format(eval_dir))
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
Ejemplo n.º 26
0
def run_eval(args):
  print(hparams_debug_string())
  synth = Synthesizer()
  synth.load(args.checkpoint)
  base_path = get_output_base_path(args.checkpoint)
  wav = load_wav(args.reference_audio)
  mel = melspectrogram(wav).transpose()
  for i, text in enumerate(sentences):
    path = '%s-%d.wav' % (base_path, i)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
      f.write(synth.synthesize(text, mel))
Ejemplo n.º 27
0
def run_eval(args):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args.checkpoint)
    base_path = get_output_base_path(args.checkpoint)
    for i, text in enumerate(sentences):
        path = '%s-%03d.wav' % (base_path, i)
        print(' ')
        print('[{:<10}]: {}'.format('processing', path))
        wav, feature = synth.synthesize(text)
        sf.write(path, wav, 16000)
        np.save(path.replace('.wav', '.npy'), feature)
Ejemplo n.º 28
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
    GTA = (args.GTA == 'True')
    if GTA:
        synth_dir = os.path.join(output_dir, 'gta')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)
    else:
        synth_dir = os.path.join(output_dir, 'natural')

        #Create output path if it doesn't exist
        os.makedirs(synth_dir, exist_ok=True)

    metadata_filename = os.path.join(args.input_dir, 'train.txt')
    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams, gta=GTA)
    with open(metadata_filename, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]
        frame_shift_ms = hparams.hop_size / hparams.sample_rate
        print(metadata[0])
        print(len(metadata))
        exit()
        hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
        log('Loaded metadata for {} examples ({:.2f} hours)'.format(
            len(metadata), hours))

    #Set inputs batch wise
    metadata = [
        metadata[i:i + hparams.tacotron_synthesis_batch_size]
        for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)
    ]

    log('Starting Synthesis')
    mel_dir = os.path.join(args.input_dir, 'mels')
    wav_dir = os.path.join(args.input_dir, 'audio')
    with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
        for i, meta in enumerate(tqdm(metadata)):
            texts = [m[5] for m in meta]
            mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta]
            wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta]
            basenames = [
                os.path.basename(m).replace('.npy', '').replace('mel-', '')
                for m in mel_filenames
            ]
            mel_output_filenames, speaker_ids = synth.synthesize(
                texts, basenames, synth_dir, None, mel_filenames)

            for elems in zip(wav_filenames, mel_filenames,
                             mel_output_filenames, speaker_ids, texts):
                file.write('|'.join([str(x) for x in elems]) + '\n')
    log('synthesized mel spectrograms at {}'.format(synth_dir))
    return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 29
0
 def init(self):
     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
     hparams.parse('')
     print(hparams_debug_string())
     self.voice_choice = 1  # female default
     self.base_dir = os.getcwd()
     checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron',
                               'model.ckpt-40000')
     self.output_path = os.path.join(self.base_dir, 'static', 'audio',
                                     'output.wav')
     self.synth = Synthesizer()
     self.synth.load(checkpoint)
Ejemplo n.º 30
0
def run_eval(args):
  print(hparams_debug_string())
  synth = Synthesizer()
  synth.load(args.checkpoint)
  base_path = get_output_base_path(args.checkpoint)
  for i, text in enumerate(sentences):
    path = '%s-%03d.wav' % (base_path, i)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
      url='http://127.0.0.1:8080/get_sentence/'+text
      text=requests.get(url).text
      f.write(synth.synthesize(text))
Ejemplo n.º 31
0
def run_eval(args):
    print(hparams_debug_string())
    synth = Synthesizer()
    synth.load(args.checkpoint)
    base_path = get_output_base_path(args.checkpoint)
    os.makedirs(base_path, exist_ok=True)
    for i, text in enumerate(sentences, 1):
        wavname = '%s-%04d.wav' % (os.path.basename(base_path), i)
        path = os.path.join(base_path, wavname)
        print('Synthesizing: %s' % path)
        with open(path, 'wb') as f:
            f.write(synth.synthesize(text + '。。'))
Ejemplo n.º 32
0
def run_eval(args):
  print(hparams_debug_string())
  synth = Synthesizer()
  modified_hp = hparams.parse(args.hparams)
  synth.load(args.checkpoint, modified_hp)

  base_path = get_output_base_path(args.checkpoint)
  for i, text in enumerate(sentences):
    path = '%s-%d.wav' % (base_path, i)
    print('Synthesizing: %s' % path)
    with open(path, 'wb') as f:
        data,wav = synth.eval(text)
        f.write(data)
Ejemplo n.º 33
0
def run_live(args, checkpoint_path, hparams):
	#Log to Terminal without keeping any records in files
	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams)

	#Generate fast greeting message
	greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!'
	log(greetings)
	generate_fast(synth, greetings)

	#Interaction loop
	while True:
		try:
			text = input()
			generate_fast(synth, text)

		except KeyboardInterrupt:
			leave = 'Thank you for testing our features. see you soon.'
			log(leave)
			generate_fast(synth, leave)
			sleep(2)
			break
Ejemplo n.º 34
0
def run_synthesis(args, checkpoint_path, output_dir, hparams):
	GTA = (args.GTA == 'True')
	if GTA:
		synth_dir = os.path.join(output_dir, 'gta')

		#Create output path if it doesn't exist
		os.makedirs(synth_dir, exist_ok=True)
	else:
		synth_dir = os.path.join(output_dir, 'natural')

		#Create output path if it doesn't exist
		os.makedirs(synth_dir, exist_ok=True)


	metadata_filename = os.path.join(args.input_dir, 'train.txt')
	log(hparams_debug_string())
	synth = Synthesizer()
	synth.load(checkpoint_path, hparams, gta=GTA)
	with open(metadata_filename, encoding='utf-8') as f:
		metadata = [line.strip().split('|') for line in f]
		frame_shift_ms = hparams.hop_size / hparams.sample_rate
		hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600)
		log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours))

	log('starting synthesis')
	mel_dir = os.path.join(args.input_dir, 'mels')
	wav_dir = os.path.join(args.input_dir, 'audio')
	with open(os.path.join(synth_dir, 'map.txt'), 'w') as file:
		for i, meta in enumerate(tqdm(metadata)):
			text = meta[5]
			mel_filename = os.path.join(mel_dir, meta[1])
			wav_filename = os.path.join(wav_dir, meta[0])
			mel_output_filename = synth.synthesize(text, i+1, synth_dir, None, mel_filename)

			file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text))
	log('synthesized mel spectrograms at {}'.format(synth_dir))
	return os.path.join(synth_dir, 'map.txt')
Ejemplo n.º 35
0
class SynthesisResource:
  def on_get(self, req, res):
    if not req.params.get('text'):
      raise falcon.HTTPBadRequest()
    res.data = synthesizer.synthesize(req.params.get('text'))
    res.content_type = 'audio/wav'


synthesizer = Synthesizer()
api = falcon.API()
api.add_route('/synthesize', SynthesisResource())
api.add_route('/', UIResource())


if __name__ == '__main__':
  from wsgiref import simple_server
  parser = argparse.ArgumentParser()
  parser.add_argument('--checkpoint', required=True, help='Full path to model checkpoint')
  parser.add_argument('--port', type=int, default=9000)
  parser.add_argument('--hparams', default='',
    help='Hyperparameter overrides as a comma-separated list of name=value pairs')
  args = parser.parse_args()
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  hparams.parse(args.hparams)
  print(hparams_debug_string())
  synthesizer.load(args.checkpoint)
  print('Serving on port %d' % args.port)
  simple_server.make_server('0.0.0.0', args.port, api).serve_forever()
else:
  synthesizer.load(os.environ['CHECKPOINT'])
Ejemplo n.º 36
0
def train(log_dir, args):
  commit = get_git_commit() if args.git else 'None'
  checkpoint_path = os.path.join(log_dir, 'model.ckpt')
  input_path = os.path.join(args.base_dir, args.input)
  log('Checkpoint path: %s' % checkpoint_path)
  log('Loading training data from: %s' % input_path)
  log('Using model: %s' % args.model)
  log(hparams_debug_string())

  # Set up DataFeeder:
  coord = tf.train.Coordinator()
  with tf.variable_scope('datafeeder') as scope:
    feeder = DataFeeder(coord, input_path, hparams)

  # Set up model:
  global_step = tf.Variable(0, name='global_step', trainable=False)
  with tf.variable_scope('model') as scope:
    model = create_model(args.model, hparams)
    model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets)
    model.add_loss()
    model.add_optimizer(global_step)
    stats = add_stats(model)

  # Bookkeeping:
  step = 0
  time_window = ValueWindow(100)
  loss_window = ValueWindow(100)
  saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

  # Train!
  with tf.Session() as sess:
    try:
      summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
      sess.run(tf.global_variables_initializer())

      if args.restore_step:
        # Restore from a checkpoint if the user requested it.
        restore_path = '%s-%d' % (checkpoint_path, args.restore_step)
        saver.restore(sess, restore_path)
        log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)
      else:
        log('Starting new training run at commit: %s' % commit, slack=True)

      feeder.start_in_session(sess)

      while not coord.should_stop():
        start_time = time.time()
        step, loss, opt = sess.run([global_step, model.loss, model.optimize])
        time_window.append(time.time() - start_time)
        loss_window.append(loss)
        message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
          step, time_window.average, loss, loss_window.average)
        log(message, slack=(step % args.checkpoint_interval == 0))

        if loss > 100 or math.isnan(loss):
          log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True)
          raise Exception('Loss Exploded')

        if step % args.summary_interval == 0:
          log('Writing summary at step: %d' % step)
          summary_writer.add_summary(sess.run(stats), step)

        if step % args.checkpoint_interval == 0:
          log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
          saver.save(sess, checkpoint_path, global_step=step)
          log('Saving audio and alignment...')
          input_seq, spectrogram, alignment = sess.run([
            model.inputs[0], model.linear_outputs[0], model.alignments[0]])
          waveform = audio.inv_spectrogram(spectrogram.T)
          audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step))
          plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step),
            info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss))
          log('Input: %s' % sequence_to_text(input_seq))

    except Exception as e:
      log('Exiting due to exception: %s' % e, slack=True)
      traceback.print_exc()
      coord.request_stop(e)
Ejemplo n.º 37
0
def train(log_dir, args, hparams, input_path):
	save_dir = os.path.join(log_dir, 'wave_pretrained/')
	eval_dir = os.path.join(log_dir, 'eval-dir')
	audio_dir = os.path.join(log_dir, 'wavs')
	plot_dir = os.path.join(log_dir, 'plots')
	wav_dir = os.path.join(log_dir, 'wavs')
	eval_audio_dir = os.path.join(eval_dir, 'wavs')
	eval_plot_dir = os.path.join(eval_dir, 'plots')
	checkpoint_path = os.path.join(save_dir, 'wavenet_model.ckpt')
	input_path = os.path.join(args.base_dir, input_path)
	os.makedirs(save_dir, exist_ok=True)
	os.makedirs(wav_dir, exist_ok=True)
	os.makedirs(audio_dir, exist_ok=True)
	os.makedirs(plot_dir, exist_ok=True)
	os.makedirs(eval_audio_dir, exist_ok=True)
	os.makedirs(eval_plot_dir, exist_ok=True)

	log('Checkpoint_path: {}'.format(checkpoint_path))
	log('Loading training data from: {}'.format(input_path))
	log('Using model: {}'.format(args.model))
	log(hparams_debug_string())

	#Start by setting a seed for repeatability
	tf.set_random_seed(hparams.wavenet_random_seed)

	#Set up data feeder
	coord = tf.train.Coordinator()
	with tf.variable_scope('datafeeder') as scope:
		feeder = Feeder(coord, input_path, args.base_dir, hparams)

	#Set up model
	global_step = tf.Variable(0, name='global_step', trainable=False)
	model, stats = model_train_mode(args, feeder, hparams, global_step)
	eval_model = model_test_mode(args, feeder, hparams, global_step)

	#book keeping
	step = 0
	time_window = ValueWindow(100)
	loss_window = ValueWindow(100)
	sh_saver = create_shadow_saver(model, global_step)

	log('Wavenet training set to a maximum of {} steps'.format(args.wavenet_train_steps))

	#Memory allocation on the memory
	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True

	#Train
	with tf.Session(config=config) as sess:
		try:
			summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
			sess.run(tf.global_variables_initializer())

			#saved model restoring
			if args.restore:
				#Restore saved model if the user requested it, default = True
				try:
					checkpoint_state = tf.train.get_checkpoint_state(save_dir)
				except tf.errors.OutOfRangeError as e:
					log('Cannot restore checkpoint: {}'.format(e))

			if (checkpoint_state and checkpoint_state.model_checkpoint_path):
				log('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path))
				load_averaged_model(sess, sh_saver, checkpoint_state.model_checkpoint_path)

			else:
				if not args.restore:
					log('Starting new training!')
				else:
					log('No model to load at {}'.format(save_dir))

			#initializing feeder
			feeder.start_threads(sess)

			#Training loop
			while not coord.should_stop() and step < args.wavenet_train_steps:
				start_time = time.time()
				step, y_hat, loss, opt = sess.run([global_step, model.y_hat, model.loss, model.optimize])
				time_window.append(time.time() - start_time)
				loss_window.append(loss)

				message = 'Step {:7d} [{:.3f} sec/step, loss={:.5f}, avg_loss={:.5f}]'.format(
					step, time_window.average, loss, loss_window.average)
				log(message, end='\r')

				if loss > 100 or np.isnan(loss):
					log('Loss exploded to {:.5f} at step {}'.format(loss, step))
					raise Exception('Loss exploded')

				if step % args.summary_interval == 0:
					log('\nWriting summary at step {}'.format(step))
					summary_writer.add_summary(sess.run(stats), step)

				if step % args.checkpoint_interval == 0:
					save_log(sess, step, model, plot_dir, audio_dir, hparams=hparams)
					save_checkpoint(sess, sh_saver, checkpoint_path, global_step)

				if step % args.eval_interval == 0:
					log('\nEvaluating at step {}'.format(step))
					eval_step(sess, step, eval_model, eval_plot_dir, eval_audio_dir, summary_writer=summary_writer , hparams=model._hparams)

			log('Wavenet training complete after {} global steps'.format(args.wavenet_train_steps))

		except Exception as e:
			log('Exiting due to Exception: {}'.format(e))
Ejemplo n.º 38
0
def train(log_dir, args, hparams):
	save_dir = os.path.join(log_dir, 'taco_pretrained/')
	checkpoint_path = os.path.join(save_dir, 'tacotron_model.ckpt')
	input_path = os.path.join(args.base_dir, args.tacotron_input)
	plot_dir = os.path.join(log_dir, 'plots')
	wav_dir = os.path.join(log_dir, 'wavs')
	mel_dir = os.path.join(log_dir, 'mel-spectrograms')
	eval_dir = os.path.join(log_dir, 'eval-dir')
	eval_plot_dir = os.path.join(eval_dir, 'plots')
	eval_wav_dir = os.path.join(eval_dir, 'wavs')
	os.makedirs(eval_dir, exist_ok=True)
	os.makedirs(plot_dir, exist_ok=True)
	os.makedirs(wav_dir, exist_ok=True)
	os.makedirs(mel_dir, exist_ok=True)
	os.makedirs(eval_plot_dir, exist_ok=True)
	os.makedirs(eval_wav_dir, exist_ok=True)

	if hparams.predict_linear:
		linear_dir = os.path.join(log_dir, 'linear-spectrograms')
		os.makedirs(linear_dir, exist_ok=True)

	log('Checkpoint path: {}'.format(checkpoint_path))
	log('Loading training data from: {}'.format(input_path))
	log('Using model: {}'.format(args.model))
	log(hparams_debug_string())

	#Start by setting a seed for repeatability
	tf.set_random_seed(hparams.tacotron_random_seed)

	#Set up data feeder
	coord = tf.train.Coordinator()
	with tf.variable_scope('datafeeder') as scope:
		feeder = Feeder(coord, input_path, hparams)

	#Set up model:
	global_step = tf.Variable(0, name='global_step', trainable=False)
	model, stats = model_train_mode(args, feeder, hparams, global_step)
	eval_model = model_test_mode(args, feeder, hparams, global_step)

	#Book keeping
	step = 0
	time_window = ValueWindow(100)
	loss_window = ValueWindow(100)
	saver = tf.train.Saver(max_to_keep=5)

	log('Tacotron training set to a maximum of {} steps'.format(args.tacotron_train_steps))

	#Memory allocation on the GPU as needed
	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True

	#Train
	with tf.Session(config=config) as sess:
		try:
			summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
			sess.run(tf.global_variables_initializer())

			#saved model restoring
			if args.restore:
				#Restore saved model if the user requested it, Default = True.
				try:
					checkpoint_state = tf.train.get_checkpoint_state(save_dir)
				except tf.errors.OutOfRangeError as e:
					log('Cannot restore checkpoint: {}'.format(e))

			if (checkpoint_state and checkpoint_state.model_checkpoint_path):
				log('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path))
				saver.restore(sess, checkpoint_state.model_checkpoint_path)

			else:
				if not args.restore:
					log('Starting new training!')
				else:
					log('No model to load at {}'.format(save_dir))

			#initializing feeder
			feeder.start_threads(sess)

			#Training loop
			while not coord.should_stop() and step < args.tacotron_train_steps:
				start_time = time.time()
				step, loss, opt = sess.run([global_step, model.loss, model.optimize])
				time_window.append(time.time() - start_time)
				loss_window.append(loss)
				message = 'Step {:7d} [{:.3f} sec/step, loss={:.5f}, avg_loss={:.5f}]'.format(
					step, time_window.average, loss, loss_window.average)
				log(message, end='\r')

				if np.isnan(loss):
					log('Loss exploded to {:.5f} at step {}'.format(loss, step))
					raise Exception('Loss exploded')

				if step % args.summary_interval == 0:
					log('\nWriting summary at step {}'.format(step))
					summary_writer.add_summary(sess.run(stats), step)

				if step % args.eval_interval == 0:
					#Run eval and save eval stats
					log('\nRunning evaluation at step {}'.format(step))

					eval_losses = []
					before_losses = []
					after_losses = []
					stop_token_losses = []
					linear_losses = []
					linear_loss = None

					if hparams.predict_linear:
						for i in tqdm(range(feeder.test_steps)):
							eloss, before_loss, after_loss, stop_token_loss, linear_loss, mel_p, mel_t, t_len, align, lin_p = sess.run(
								[eval_model.loss, eval_model.before_loss, eval_model.after_loss,
								eval_model.stop_token_loss, eval_model.linear_loss, eval_model.mel_outputs[0], 
								eval_model.mel_targets[0], eval_model.targets_lengths[0], 
								eval_model.alignments[0], eval_model.linear_outputs[0]])
							eval_losses.append(eloss)
							before_losses.append(before_loss)
							after_losses.append(after_loss)
							stop_token_losses.append(stop_token_loss)
							linear_losses.append(linear_loss)
						linear_loss = sum(linear_losses) / len(linear_losses)

						wav = audio.inv_linear_spectrogram(lin_p.T, hparams)
						audio.save_wav(wav, os.path.join(eval_wav_dir, 'step-{}-eval-waveform-linear.wav'.format(step)), sr=hparams.sample_rate)
					else:
						for i in tqdm(range(feeder.test_steps)):
							eloss, before_loss, after_loss, stop_token_loss, mel_p, mel_t, t_len, align = sess.run(
								[eval_model.loss, eval_model.before_loss, eval_model.after_loss,
								eval_model.stop_token_loss, eval_model.mel_outputs[0], eval_model.mel_targets[0],
								eval_model.targets_lengths[0], eval_model.alignments[0]])
							eval_losses.append(eloss)
							before_losses.append(before_loss)
							after_losses.append(after_loss)
							stop_token_losses.append(stop_token_loss)

					eval_loss = sum(eval_losses) / len(eval_losses)
					before_loss = sum(before_losses) / len(before_losses)
					after_loss = sum(after_losses) / len(after_losses)
					stop_token_loss = sum(stop_token_losses) / len(stop_token_losses)

					log('Saving eval log to {}..'.format(eval_dir))
					#Save some log to monitor model improvement on same unseen sequence
					wav = audio.inv_mel_spectrogram(mel_p.T, hparams)
					audio.save_wav(wav, os.path.join(eval_wav_dir, 'step-{}-eval-waveform-mel.wav'.format(step)), sr=hparams.sample_rate)

					plot.plot_alignment(align, os.path.join(eval_plot_dir, 'step-{}-eval-align.png'.format(step)),
						info='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, eloss),
						max_len=t_len // hparams.outputs_per_step)
					plot.plot_spectrogram(mel_p, os.path.join(eval_plot_dir, 'step-{}-eval-mel-spectrogram.png'.format(step)),
						info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, eloss), target_spectrogram=mel_t,
						max_len=t_len)

					log('Eval loss for global step {}: {:.3f}'.format(step, eval_loss))
					log('Writing eval summary!')
					add_eval_stats(summary_writer, step, linear_loss, before_loss, after_loss, stop_token_loss, eval_loss)

				
				if step % args.checkpoint_interval == 0:
					#Save model and current global step
					saver.save(sess, checkpoint_path, global_step=global_step)
					
					log('\nSaving alignment, Mel-Spectrograms and griffin-lim inverted waveform..')
					if hparams.predict_linear:
						input_seq, mel_prediction, linear_prediction, alignment, target, target_length = sess.run([
							model.inputs[0],
							model.mel_outputs[0],
							model.linear_outputs[0],
							model.alignments[0],
							model.mel_targets[0],
							model.targets_lengths[0],
							])

						#save predicted linear spectrogram to disk (debug)
						linear_filename = 'linear-prediction-step-{}.npy'.format(step)
						np.save(os.path.join(linear_dir, linear_filename), linear_prediction.T, allow_pickle=False)

						#save griffin lim inverted wav for debug (linear -> wav)
						wav = audio.inv_linear_spectrogram(linear_prediction.T, hparams)
						audio.save_wav(wav, os.path.join(wav_dir, 'step-{}-wave-from-linear.wav'.format(step)), sr=hparams.sample_rate)

					else:
						input_seq, mel_prediction, alignment, target, target_length = sess.run([model.inputs[0],
							model.mel_outputs[0],
							model.alignments[0],
							model.mel_targets[0],
							model.targets_lengths[0],
							])

					#save predicted mel spectrogram to disk (debug)
					mel_filename = 'mel-prediction-step-{}.npy'.format(step)
					np.save(os.path.join(mel_dir, mel_filename), mel_prediction.T, allow_pickle=False)

					#save griffin lim inverted wav for debug (mel -> wav)
					wav = audio.inv_mel_spectrogram(mel_prediction.T, hparams)
					audio.save_wav(wav, os.path.join(wav_dir, 'step-{}-wave-from-mel.wav'.format(step)), sr=hparams.sample_rate)

					#save alignment plot to disk (control purposes)
					plot.plot_alignment(alignment, os.path.join(plot_dir, 'step-{}-align.png'.format(step)),
						info='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, loss),
						max_len=target_length // hparams.outputs_per_step)
					#save real and predicted mel-spectrogram plot to disk (control purposes)
					plot.plot_spectrogram(mel_prediction, os.path.join(plot_dir, 'step-{}-mel-spectrogram.png'.format(step)),
						info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, loss), target_spectrogram=target,
						max_len=target_length)
					log('Input at step {}: {}'.format(step, sequence_to_text(input_seq)))

			log('Tacotron training complete after {} global steps!'.format(args.tacotron_train_steps))
			return save_dir

		except Exception as e:
			log('Exiting due to exception: {}'.format(e))
			traceback.print_exc()
			coord.request_stop(e)
Ejemplo n.º 39
0
        features = features[:labels.num_frames()]
        indices = labels.silence_frame_indices()
        features = np.delete(features, indices, axis=0)

        return features.astype(np.float32)


if __name__ == "__main__":
    args = docopt(__doc__)
    print("Command line args:\n", args)
    DATA_ROOT = args["<DATA_ROOT>"]
    max_files = int(args["--max_files"])
    dst_dir = args["--dst_dir"]
    overwrite = args["--overwrite"]

    print("Acoustic", hparams_debug_string(hp_acoustic))
    print("Duration", hparams_debug_string(hp_duration))

    assert hp_acoustic.question_path == hp_duration.question_path
    assert hp_acoustic.use_phone_alignment == hp_duration.use_phone_alignment

    # Features required to train duration model
    # X -> Y
    # X: linguistic
    # Y: duration
    X_duration_source = LinguisticSource(
        DATA_ROOT, max_files,
        add_frame_features=hp_duration.add_frame_features,
        subphone_features=hp_duration.subphone_features)
    Y_duration_source = DurationSource(DATA_ROOT, max_files)