def load(self, checkpoint_path, num_speakers=2, checkpoint_step=None, model_name='tacotron'): self.num_speakers = num_speakers if os.path.isdir(checkpoint_path): load_path = checkpoint_path checkpoint_path = get_most_recent_checkpoint( checkpoint_path, checkpoint_step) else: load_path = os.path.dirname(checkpoint_path) print('Constructing model: %s' % model_name) inputs = tf.placeholder(tf.int32, [None, None], 'inputs') input_lengths = tf.placeholder(tf.int32, [None], 'input_lengths') batch_size = tf.shape(inputs)[0] speaker_id = tf.placeholder_with_default( tf.zeros([batch_size], dtype=tf.int32), [None], 'speaker_id') load_hparams(hparams, load_path) with tf.variable_scope('model') as scope: self.model = create_model(hparams) self.model.initialize(inputs, input_lengths, self.num_speakers, speaker_id, rnn_decoder_test_mode=True) self.wav_output = inv_spectrogram_tensorflow( self.model.linear_outputs, hparams) print('Loading checkpoint: %s' % checkpoint_path) sess_config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=2) sess_config.gpu_options.allow_growth = True self.sess = tf.Session(config=sess_config) self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self.sess, checkpoint_path)
def train(log_dir, config): config.data_paths = config.data_paths # ['datasets/moon'] data_dirs = config.data_paths # ['datasets/moon\\data'] num_speakers = len(data_dirs) config.num_test = config.num_test_per_speaker * num_speakers # 2*1 if num_speakers > 1 and hparams.model_type not in ["deepvoice", "simple"]: raise Exception("[!] Unkown model_type for multi-speaker: {}".format( config.model_type)) commit = get_git_commit() if config.git else 'None' checkpoint_path = os.path.join( log_dir, 'model.ckpt' ) # 'logdir-tacotron\\moon_2018-08-28_13-06-42\\model.ckpt' #log(' [*] git recv-parse HEAD:\n%s' % get_git_revision_hash()) # hccho: 주석 처리 log('=' * 50) #log(' [*] dit diff:\n%s' % get_git_diff()) log('=' * 50) log(' [*] Checkpoint path: %s' % checkpoint_path) log(' [*] Loading training data from: %s' % data_dirs) log(' [*] Using model: %s' % config.model_dir) # 'logdir-tacotron\\moon_2018-08-28_13-06-42' log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: # DataFeeder의 6개 placeholder: train_feeder.inputs, train_feeder.input_lengths, train_feeder.loss_coeff, train_feeder.mel_targets, train_feeder.linear_targets, train_feeder.speaker_id train_feeder = DataFeederTacotron(coord, data_dirs, hparams, config, 32, data_type='train', batch_size=config.batch_size) test_feeder = DataFeederTacotron(coord, data_dirs, hparams, config, 8, data_type='test', batch_size=config.num_test) # Set up model: is_randomly_initialized = config.initialize_path is None global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(hparams) model.initialize(train_feeder.inputs, train_feeder.input_lengths, num_speakers, train_feeder.speaker_id, train_feeder.mel_targets, train_feeder.linear_targets, train_feeder.loss_coeff, is_randomly_initialized=is_randomly_initialized) model.add_loss() model.add_optimizer(global_step) train_stats = add_stats(model, scope_name='stats') # legacy with tf.variable_scope('model', reuse=True) as scope: test_model = create_model(hparams) test_model.initialize(test_feeder.inputs, test_feeder.input_lengths, num_speakers, test_feeder.speaker_id, test_feeder.mel_targets, test_feeder.linear_targets, test_feeder.loss_coeff, rnn_decoder_test_mode=True, is_randomly_initialized=is_randomly_initialized) test_model.add_loss() test_stats = add_stats(test_model, model, scope_name='test') test_stats = tf.summary.merge([test_stats, train_stats]) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=None, keep_checkpoint_every_n_hours=2) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) sess_config.gpu_options.allow_growth = True # Train! #with tf.Session(config=sess_config) as sess: with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if config.load_path: # Restore from a checkpoint if the user requested it. restore_path = get_most_recent_checkpoint(config.model_dir) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) elif config.initialize_path: restore_path = get_most_recent_checkpoint( config.initialize_path) saver.restore(sess, restore_path) log('Initialized from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) start_step = sess.run(global_step) log('=' * 50) log(' [*] Global step is reset to {}'.format(start_step)) log('=' * 50) else: log('Starting new training run at commit: %s' % commit, slack=True) start_step = sess.run(global_step) train_feeder.start_in_session(sess, start_step) test_feeder.start_in_session(sess, start_step) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss_without_coeff, model.optimize], feed_dict=model.get_dummy_feed_dict()) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % config.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % config.summary_interval == 0: log('Writing summary at step: %d' % step) feed_dict = { **model.get_dummy_feed_dict(), **test_model.get_dummy_feed_dict() } summary_writer.add_summary( sess.run(test_stats, feed_dict=feed_dict), step) if step % config.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) if step % config.test_interval == 0: log('Saving audio and alignment...') num_test = config.num_test fetches = [ model.inputs[:num_test], model.linear_outputs[:num_test], model.alignments[:num_test], test_model.inputs[:num_test], test_model.linear_outputs[:num_test], test_model.alignments[:num_test], ] feed_dict = { **model.get_dummy_feed_dict(), **test_model.get_dummy_feed_dict() } sequences, spectrograms, alignments, test_sequences, test_spectrograms, test_alignments = sess.run( fetches, feed_dict=feed_dict) #librosa는 ffmpeg가 있어야 한다. save_and_plot( sequences[:1], spectrograms[:1], alignments[:1], log_dir, step, loss, "train" ) # spectrograms: (num_test,200,1025), alignments: (num_test,encoder_length,decoder_length) save_and_plot(test_sequences, test_spectrograms, test_alignments, log_dir, step, loss, "test") except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)