def load(self,
             checkpoint_path,
             num_speakers=2,
             checkpoint_step=None,
             model_name='tacotron'):
        self.num_speakers = num_speakers

        if os.path.isdir(checkpoint_path):
            load_path = checkpoint_path
            checkpoint_path = get_most_recent_checkpoint(
                checkpoint_path, checkpoint_step)
        else:
            load_path = os.path.dirname(checkpoint_path)

        print('Constructing model: %s' % model_name)

        inputs = tf.placeholder(tf.int32, [None, None], 'inputs')
        input_lengths = tf.placeholder(tf.int32, [None], 'input_lengths')

        batch_size = tf.shape(inputs)[0]
        speaker_id = tf.placeholder_with_default(
            tf.zeros([batch_size], dtype=tf.int32), [None], 'speaker_id')

        load_hparams(hparams, load_path)
        with tf.variable_scope('model') as scope:
            self.model = create_model(hparams)

            self.model.initialize(inputs, input_lengths, self.num_speakers,
                                  speaker_id)
            self.wav_output = \
                    inv_spectrogram_tensorflow(self.model.linear_outputs)

        print('Loading checkpoint: %s' % checkpoint_path)

        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     intra_op_parallelism_threads=1,
                                     inter_op_parallelism_threads=2)
        sess_config.gpu_options.allow_growth = True

        self.sess = tf.Session(config=sess_config)
        self.sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(self.sess, checkpoint_path)
예제 #2
0
def train(log_dir, config):
    config.data_paths = config.data_paths

    data_dirs = [os.path.join(data_path, "data") \
            for data_path in config.data_paths]
    num_speakers = len(data_dirs)
    config.num_test = config.num_test_per_speaker * num_speakers

    if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]:
        raise Exception("[!] Unkown model_type for multi-speaker: {}".format(
            config.model_type))

    commit = get_git_commit() if config.git else 'None'
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')

    #log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash())
    log('=' * 50)
    #log(' [*] dit diff:\n%s' % get_git_diff())
    log('=' * 50)
    log(' [*] Checkpoint path: %s' % checkpoint_path)
    log(' [*] Loading training data from: %s' % data_dirs)
    log(' [*] Using model: %s' % config.model_dir)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()
    with tf.variable_scope('datafeeder') as scope:
        train_feeder = DataFeeder(coord,
                                  data_dirs,
                                  hparams,
                                  config,
                                  32,
                                  data_type='train',
                                  batch_size=hparams.batch_size)
        test_feeder = DataFeeder(coord,
                                 data_dirs,
                                 hparams,
                                 config,
                                 8,
                                 data_type='test',
                                 batch_size=config.num_test)

    # Set up model:
    is_randomly_initialized = config.initialize_path is None
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('model') as scope:
        model = create_model(hparams)
        model.initialize(train_feeder.inputs,
                         train_feeder.input_lengths,
                         num_speakers,
                         train_feeder.speaker_id,
                         train_feeder.mel_targets,
                         train_feeder.linear_targets,
                         train_feeder.loss_coeff,
                         is_randomly_initialized=is_randomly_initialized)

        model.add_loss()
        model.add_optimizer(global_step)
        train_stats = add_stats(model, scope_name='stats')  # legacy

    with tf.variable_scope('model', reuse=True) as scope:
        test_model = create_model(hparams)
        test_model.initialize(test_feeder.inputs,
                              test_feeder.input_lengths,
                              num_speakers,
                              test_feeder.speaker_id,
                              test_feeder.mel_targets,
                              test_feeder.linear_targets,
                              test_feeder.loss_coeff,
                              rnn_decoder_test_mode=True,
                              is_randomly_initialized=is_randomly_initialized)
        test_model.add_loss()

    test_stats = add_stats(test_model, model, scope_name='test')
    test_stats = tf.summary.merge([test_stats, train_stats])

    # Bookkeeping:
    step = 0
    time_window = ValueWindow(100)
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=None, keep_checkpoint_every_n_hours=2)

    sess_config = tf.ConfigProto(log_device_placement=False,
                                 allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    # Train!
    #with tf.Session(config=sess_config) as sess:
    with tf.Session() as sess:
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            sess.run(tf.global_variables_initializer())

            if config.load_path:
                # Restore from a checkpoint if the user requested it.
                restore_path = get_most_recent_checkpoint(config.model_dir)
                saver.restore(sess, restore_path)
                log('Resuming from checkpoint: %s at commit: %s' %
                    (restore_path, commit),
                    slack=True)
            elif config.initialize_path:
                restore_path = get_most_recent_checkpoint(
                    config.initialize_path)
                saver.restore(sess, restore_path)
                log('Initialized from checkpoint: %s at commit: %s' %
                    (restore_path, commit),
                    slack=True)

                zero_step_assign = tf.assign(global_step, 0)
                sess.run(zero_step_assign)

                start_step = sess.run(global_step)
                log('=' * 50)
                log(' [*] Global step is reset to {}'. \
                        format(start_step))
                log('=' * 50)
            else:
                log('Starting new training run at commit: %s' % commit,
                    slack=True)

            start_step = sess.run(global_step)

            train_feeder.start_in_session(sess, start_step)
            test_feeder.start_in_session(sess, start_step)

            while not coord.should_stop():
                start_time = time.time()
                step, loss, opt = sess.run(
                    [global_step, model.loss_without_coeff, model.optimize],
                    feed_dict=model.get_dummy_feed_dict())

                time_window.append(time.time() - start_time)
                loss_window.append(loss)

                message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                    step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % config.checkpoint_interval == 0))

                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step),
                        slack=True)
                    raise Exception('Loss Exploded')

                if step % config.summary_interval == 0:
                    log('Writing summary at step: %d' % step)

                    feed_dict = {
                        **model.get_dummy_feed_dict(),
                        **test_model.get_dummy_feed_dict()
                    }
                    summary_writer.add_summary(
                        sess.run(test_stats, feed_dict=feed_dict), step)

                if step % config.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' %
                        (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)

                if step % config.test_interval == 0:
                    log('Saving audio and alignment...')
                    num_test = config.num_test

                    fetches = [
                        model.inputs[:num_test],
                        model.linear_outputs[:num_test],
                        model.alignments[:num_test],
                        test_model.inputs[:num_test],
                        test_model.linear_outputs[:num_test],
                        test_model.alignments[:num_test],
                    ]
                    feed_dict = {
                        **model.get_dummy_feed_dict(),
                        **test_model.get_dummy_feed_dict()
                    }

                    sequences, spectrograms, alignments, \
                            test_sequences, test_spectrograms, test_alignments = \
                                    sess.run(fetches, feed_dict=feed_dict)

                    save_and_plot(sequences[:1], spectrograms[:1],
                                  alignments[:1], log_dir, step, loss, "train")
                    save_and_plot(test_sequences, test_spectrograms,
                                  test_alignments, log_dir, step, loss, "test")

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)
예제 #3
0
def train(log_dir, config):
    config.data_paths = config.data_paths  # 파싱된 명령행 인자값 중 데이터 경로 : default='datasets/kr_example'

    data_dirs = [os.path.join(data_path, "data") \
            for data_path in config.data_paths]
    num_speakers = len(data_dirs) # 학습하는 화자 수 측정 : 단일화자 모델-1, 다중화자 모델-2
    config.num_test = config.num_test_per_speaker * num_speakers

    if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]:  # 다중화자 모델 학습일 때 모델 타입이 "deepvoice"나 "simple"이 아니라면
        raise Exception("[!] Unkown model_type for multi-speaker: {}".format(config.model_type))  # hparams.modle_type을 config.model_type으로 오타남.

    commit = get_git_commit() if config.git else 'None'  # git 관련된거여서 무시
    checkpoint_path = os.path.join(log_dir, 'model.ckpt')  # checkpoint_path 경로 지정-model.skpt 파일 경로

    log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash())  # git log
    log('='*50)  # 줄 구분용 =====
    #log(' [*] dit diff:\n%s' % get_git_diff())
    log('='*50)  # 줄 구분용 =====
    log(' [*] Checkpoint path: %s' % checkpoint_path)  # check_point 경로 출력
    log(' [*] Loading training data from: %s' % data_dirs)
    log(' [*] Using model: %s' % config.model_dir)
    log(hparams_debug_string())

    # Set up DataFeeder:
    coord = tf.train.Coordinator()  # 쓰레드 사용 선언
    with tf.variable_scope('datafeeder') as scope:
        train_feeder = DataFeeder(
                coord, data_dirs, hparams, config, 32,
                data_type='train', batch_size=hparams.batch_size)
        # def __init__(self, coordinator, data_dirs, hparams, config, batches_per_group, data_type, batch_size):
        test_feeder = DataFeeder(
                coord, data_dirs, hparams, config, 8,
                data_type='test', batch_size=config.num_test)

    # Set up model:
    is_randomly_initialized = config.initialize_path is None
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.variable_scope('model') as scope:
        model = create_model(hparams)  # Tacotron 모델 생성
        model.initialize(
                train_feeder.inputs, train_feeder.input_lengths,
                num_speakers,  train_feeder.speaker_id,
                train_feeder.mel_targets, train_feeder.linear_targets,
                train_feeder.loss_coeff,
                is_randomly_initialized=is_randomly_initialized)

        model.add_loss()
        model.add_optimizer(global_step)
        train_stats = add_stats(model, scope_name='stats') # legacy

    with tf.variable_scope('model', reuse=True) as scope:
        test_model = create_model(hparams)  # Tacotron test모델 생성
        test_model.initialize(
                test_feeder.inputs, test_feeder.input_lengths,
                num_speakers, test_feeder.speaker_id,
                test_feeder.mel_targets, test_feeder.linear_targets,
                test_feeder.loss_coeff, rnn_decoder_test_mode=True,
                is_randomly_initialized=is_randomly_initialized)
        test_model.add_loss()

    test_stats = add_stats(test_model, model, scope_name='test')  # model의 loss값같은것들을 tensorboard에 기록 / model에 test_model, model2에 model
    test_stats = tf.summary.merge([test_stats, train_stats])

    # Bookkeeping:
    step = 0
    time_window = ValueWindow(100)  # ValueWindow 클래스 window_size = 100
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=None, keep_checkpoint_every_n_hours=2)  # 2시간에 한번씩 자동저장, checkpoint 삭제 안됨

    sess_config = tf.ConfigProto(
            log_device_placement=False,  # log_device_placement 작성하는동안 할당장치 알려줌.
            allow_soft_placement=True)  # allow_soft_placement False면 GPU없을때 오류남
    sess_config.gpu_options.allow_growth=True  # 탄력적으로 GPU메모리 사용

    # Train!
    #with tf.Session(config=sess_config) as sess:
    with tf.Session() as sess:  # with문 내의 모든 명령들은 CPU 혹은 GPU 사용 선언
        try:
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)  # summary 오퍼레이션이 평가된 결과 및 텐서보드 그래프를 파라미터 형식으로 log_dir 에 저장
            sess.run(tf.global_variables_initializer())  # 데이터셋이 로드되고 그래프가 모두 정의되면 변수를 초기화하여 훈련 시작

            if config.load_path:  # log의 설정 값들 경로를 지정하였다면
                # Restore from a checkpoint if the user requested it.
                restore_path = get_most_recent_checkpoint(config.model_dir)  # 가장 마지막에 저장된 파일경로 저장
                saver.restore(sess, restore_path)  # restore_path 값 가져오기
                log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)  # git과 slack을 이용한 log 출력
            elif config.initialize_path:  # log의 설정 값들로 초기화하여 사용하기로 지정하였다면
                restore_path = get_most_recent_checkpoint(config.initialize_path)  # 지정된 경로에서 가장 마지막에 저장된 파일경로 저장
                saver.restore(sess, restore_path)  # restore_path 값 가져오기
                log('Initialized from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True)  # git과 slack을 이용한 log 출력

                zero_step_assign = tf.assign(global_step, 0)  # global_step의 텐서 객체 참조 변수 값을 0으로 바꿔주는 명령어 지정
                sess.run(zero_step_assign)  # 변수들을 모두 0으로 바꾸는 명령어 실행

                start_step = sess.run(global_step)  # global_step 값 부분을 시작지점으로 하여 연산 시작
                log('='*50)
                log(' [*] Global step is reset to {}'. \
                        format(start_step))  # 즉, 연산 시작 부분이 0으로 초기화 되었다고 알려줌.
                log('='*50)
            else:
                log('Starting new training run at commit: %s' % commit, slack=True)  # 과거의 데이터를 사용하지 않을 경우 새로운 학습이라고 log 출력

            start_step = sess.run(global_step)  # 연산 시작지점 가져오기

            train_feeder.start_in_session(sess, start_step)
            test_feeder.start_in_session(sess, start_step)

            while not coord.should_stop():  # 쓰레드가 멈춰야하는 상황이 아니라면
                start_time = time.time()  # 시작시간 지정(1970년 1월 1일 이후 경과된 시간을 UTC 기준으로 초로 반환)
                step, loss, opt = sess.run(
                        [global_step, model.loss_without_coeff, model.optimize],
                        feed_dict=model.get_dummy_feed_dict())  # step 값은 global_step 값으로 지정, loss 값은

                time_window.append(time.time() - start_time)
                loss_window.append(loss)

                message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % (
                        step, time_window.average, loss, loss_window.average)
                log(message, slack=(step % config.checkpoint_interval == 0))

                if loss > 100 or math.isnan(loss):
                    log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True)
                    raise Exception('Loss Exploded')

                if step % config.summary_interval == 0:
                    log('Writing summary at step: %d' % step)

                    feed_dict = {
                            **model.get_dummy_feed_dict(),
                            **test_model.get_dummy_feed_dict()
                    }
                    summary_writer.add_summary(sess.run(
                            test_stats, feed_dict=feed_dict), step)

                if step % config.checkpoint_interval == 0:
                    log('Saving checkpoint to: %s-%d' % (checkpoint_path, step))
                    saver.save(sess, checkpoint_path, global_step=step)

                if step % config.test_interval == 0:
                    log('Saving audio and alignment...')
                    num_test = config.num_test

                    fetches = [
                            model.inputs[:num_test],
                            model.linear_outputs[:num_test],
                            model.alignments[:num_test],
                            test_model.inputs[:num_test],
                            test_model.linear_outputs[:num_test],
                            test_model.alignments[:num_test],
                    ]
                    feed_dict = {
                            **model.get_dummy_feed_dict(),
                            **test_model.get_dummy_feed_dict()
                    }

                    sequences, spectrograms, alignments, \
                            test_sequences, test_spectrograms, test_alignments = \
                                    sess.run(fetches, feed_dict=feed_dict)

                    save_and_plot(sequences[:1], spectrograms[:1], alignments[:1],
                            log_dir, step, loss, "train")
                    save_and_plot(test_sequences, test_spectrograms, test_alignments,
                            log_dir, step, loss, "test")

        except Exception as e:
            log('Exiting due to exception: %s' % e, slack=True)
            traceback.print_exc()
            coord.request_stop(e)