def main05(ROOT_PATH, path_loadSession, path_directory, MODEL_NAME): logFolderName = 'exp/log/%s' % MODEL_NAME logFileName = '%s/prepareDatasets.log' % logFolderName log_path = os.path.join(ROOT_PATH, logFolderName) makedirs(log_path) log_path = os.path.join(ROOT_PATH, logFileName) infolog.init(log_path, ROOT_PATH) #make5thWaves("%s%s"%(path_loadSession,5)) for k in range(5): session_ = [] session = load_session("%s%s" % (path_loadSession, k + 1)) for idx in range(len(session)): session_.append(session[idx]) dic_ = count_emotion(session_) log('=' * 50) log('Total Session_%d :' % (k + 1) + " %d" % sum(dic_.values())) log(dic_) pathName1 = "%s/session%d/" % (path_directory, (k + 1)) log('=' * 50) if save_wavFile(session_, pathName1) == 0: log('Completed to save session_%d Wave files successfully.' % (k + 1)) log('=' * 50)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default='') parser.add_argument('--input', default='training/train.txt') parser.add_argument('--model', default='Tacotron') parser.add_argument( '--name', help='Name of the run, Used for logging, Defaults to model name') parser.add_argument('--restore', type=bool, default=True, help='Set this to False to do a fresh training') parser.add_argument('--summary_interval', type=int, default=10, help='Steps between running summary ops') parser.add_argument('--checkpoint_interval', type=int, default=100, help='Steps between writing checkpoints') parser.add_argument('--tf_log_level', type=int, default=1, help='Tensorflow C++ log level.') args = parser.parse_args() os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level) run_name = args.name or args.model log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name)) os.makedirs(log_dir, exist_ok=True) infolog.init(os.path.join(log_dir, 'Terminal_train_log'), run_name) args.hparams = hparams train(log_dir, args)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--log_dir', default='logdir-tacotron2') parser.add_argument( '--data_paths', default= 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\son' ) #parser.add_argument('--data_paths', default='D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\small1,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\small2') #parser.add_argument('--load_path', default=None) # 아래의 'initialize_path'보다 우선 적용 parser.add_argument( '--load_path', default='logdir-tacotron2/moon+son_2019-03-01_10-35-44') parser.add_argument( '--initialize_path', default=None) # ckpt로 부터 model을 restore하지만, global step은 0에서 시작 parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--num_test_per_speaker', type=int, default=2) parser.add_argument('--random_seed', type=int, default=123) parser.add_argument('--summary_interval', type=int, default=100) parser.add_argument('--test_interval', type=int, default=500) # 500 parser.add_argument('--checkpoint_interval', type=int, default=2000) # 2000 parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging') parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.') parser.add_argument( '--git', action='store_true', help='If set, verify that the client is clean.' ) # The store_true option automatically creates a default value of False. config = parser.parse_args() config.data_paths = config.data_paths.split(",") setattr(hparams, "num_speakers", len(config.data_paths)) prepare_dirs(config, hparams) log_path = os.path.join(config.model_dir, 'train.log') infolog.init(log_path, config.model_dir, config.slack_url) tf.set_random_seed(config.random_seed) print(config.data_paths) if config.load_path is not None and config.initialize_path is not None: raise Exception( " [!] Only one of load_path and initialize_path should be set") train(config.model_dir, config)
def main(): parser = argparse.ArgumentParser() # argparse 라이브러리를 사용 ''' add_argument를 통해 명령행 옵션을 추가한다 위치인자, 옵션인자 위치인자 : '명령행 옵션'에 -, -- 가 안붙는 형태, 선언 순서가 명령행 입력 순서에 영향을 끼친다 옵션인자 : '명령행 옵션'에 -, -- 가 붙는 형태 옵션인자는 명령행에 없어도 되지만 위치인자는 없으면 오류를 낸다 add_argument('명령행 옵션', type=x, default=y, action='store_true', help='hello, python') '명령행 옵션' : 사용하고 싶은 명령어 옵션 명명 type=x : 명령행 옵션을 통해 받는 변수의 type 지정 default=y : 해당 옵션을 사용하지 않아도 dafault=y 값이 사용됨. 평상시에는 None의 값을 가짐 action='store_true' : 명령행 옵션을 변수명으로 이용시 반환값을 True로 지정. default 값은 False help='hello, python' : -h, --help 를 사용할때 나타나는 명령행 옵션 옆에서 명령행 옵션에 대한 설명을 서술하는 용도 ''' parser.add_argument('--log_dir', default='logs') parser.add_argument('--data_paths', default='datasets/kr_example') parser.add_argument('--load_path', default=None) parser.add_argument('--initialize_path', default=None) parser.add_argument('--num_test_per_speaker', type=int, default=2) parser.add_argument('--random_seed', type=int, default=123) parser.add_argument('--summary_interval', type=int, default=100) parser.add_argument('--test_interval', type=int, default=500) parser.add_argument('--checkpoint_interval', type=int, default=1000) parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging') # utils 폴더의 __init__.py 내부의 str2bool 함수 사용 parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.') parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.') config = parser.parse_args() # 명령행 인자값 파싱 config.data_paths = config.data_paths.split(",") # (주의!)data_paths(default:datasets/kr_example)를 split. setattr(hparams, "num_speakers", len(config.data_paths)) # hparams의 "num_speakers에 값 len(config.data_paths) 설정" prepare_dirs(config, hparams) log_path = os.path.join(config.model_dir, 'train.log') # config.model_dir, 'train.log' 경로 합침 infolog.init(log_path, config.model_dir, config.slack_url) tf.set_random_seed(config.random_seed) print(config.data_paths) if any("krbook" not in data_path for data_path in config.data_paths) and \ hparams.sample_rate != 20000: warning("Detect non-krbook dataset. May need to set sampling rate from {} to 20000".\ format(hparams.sample_rate)) if any('LJ' in data_path for data_path in config.data_paths) and \ hparams.sample_rate != 22050: warning("Detect LJ Speech dataset. Set sampling rate from {} to 22050".\ format(hparams.sample_rate)) if config.load_path is not None and config.initialize_path is not None: raise Exception(" [!] Only one of load_path and initialize_path should be set") train(config.model_dir, config)
def prepare_run(args): modified_hp = hparams.parse(args.hparams) os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level) run_name = args.name or args.model log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name)) os.makedirs(log_dir, exist_ok=True) infolog.init(os.path.join(log_dir, 'Terminal_train_log'), run_name, args.slack_url) return log_dir, modified_hp
def main(): parser = argparse.ArgumentParser() parser.add_argument('--log_dir', default='logs') parser.add_argument('--data_paths', default='datasets/LJSpeech_1_0') parser.add_argument('--load_path', default=None) parser.add_argument('--initialize_path', default=None) parser.add_argument('--model', default='tacotron', help='tacotron or tacotron2') parser.add_argument('--num_test_per_speaker', type=int, default=2) parser.add_argument('--random_seed', type=int, default=123) parser.add_argument('--summary_interval', type=int, default=20) parser.add_argument('--test_interval', type=int, default=100) parser.add_argument('--checkpoint_interval', type=int, default=500) parser.add_argument('--skip_path_filter', type=str2bool, default=True, help='Use only for debugging') parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.') parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.') config = parser.parse_args() config.data_paths = config.data_paths.split(",") setattr(hparams, "num_speakers", len(config.data_paths)) prepare_dirs(config, hparams) log_path = os.path.join(config.model_dir, 'train.log') infolog.init(log_path, config.model_dir, config.slack_url) tf.set_random_seed(config.random_seed) print(config.data_paths) if any("krbook" not in data_path for data_path in config.data_paths) and \ hparams.sample_rate != 20000: warning("Detect non-krbook dataset. May need to set sampling rate from {} to 20000". \ format(hparams.sample_rate)) if any('LJ' in data_path for data_path in config.data_paths) and \ hparams.sample_rate != 22050: warning("Detect LJ Speech dataset. Set sampling rate from {} to 22050". \ format(hparams.sample_rate)) if config.load_path is not None and config.initialize_path is not None: raise Exception( " [!] Only one of load_path and initialize_path should be set") train(config.model_dir, config)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--log_dir', default='logdir-tacotron') parser.add_argument('--data_paths', default='./data/IU,./data/kss') parser.add_argument('--load_path', default='./logdir-tacotron/IU+kss_2019-01-08_03-26-23') # 아래의 'initialize_path'보다 우선 적용 #parser.add_argument('--load_path', default='logdir-tacotron/moon+son_2018-12-25_19-03-21') parser.add_argument('--initialize_path', default=None) # ckpt로 부터 model을 restore하지만, global step은 0에서 시작 parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--num_test_per_speaker', type=int, default=2) parser.add_argument('--random_seed', type=int, default=123) parser.add_argument('--summary_interval', type=int, default=100000) parser.add_argument('--test_interval', type=int, default=500) # 500 parser.add_argument('--checkpoint_interval', type=int, default=5000) # 2000 parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging') parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.') parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.') # The store_true option automatically creates a default value of False. config = parser.parse_args() config.data_paths = config.data_paths.split(",") setattr(hparams, "num_speakers", len(config.data_paths)) prepare_dirs(config, hparams) log_path = os.path.join(config.model_dir, 'train.log') infolog.init(log_path, config.model_dir, config.slack_url) tf.set_random_seed(config.random_seed) print(config.data_paths) if any("krbook" not in data_path for data_path in config.data_paths) and hparams.sample_rate != 20000: warning("Detect non-krbook dataset. May need to set sampling rate from {} to 20000".format(hparams.sample_rate)) if any('LJ' in data_path for data_path in config.data_paths) and hparams.sample_rate != 22050: warning("Detect LJ Speech dataset. Set sampling rate from {} to 22050".format(hparams.sample_rate)) if config.load_path is not None and config.initialize_path is not None: raise Exception(" [!] Only one of load_path and initialize_path should be set") #print('===config.model_dir====') #print(config.model_dir) train(config.model_dir, config)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--log_dir', default='logdir-tacotron2') parser.add_argument('--data_paths', default=None) parser.add_argument('--load_path', default=None) # 아래의 'initialize_path'보다 우선 적용 parser.add_argument('--initialize_path', default=None) # ckpt로 부터 model을 restore하지만, global step은 0에서 시작 parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--num_test_per_speaker', type=int, default=2) parser.add_argument('--random_seed', type=int, default=123) parser.add_argument('--summary_interval', type=int, default=100) parser.add_argument('--test_interval', type=int, default=500) # 500 parser.add_argument('--checkpoint_interval', type=int, default=2000) # 2000 parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging') parser.add_argument('--lang', default='Korean') parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.') parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.') # The store_true option automatically creates a default value of False. config = parser.parse_args() if not config.data_paths: raise Exception("data paths are not set") config.data_paths = config.data_paths.split(",") setattr(default_hparams, "num_speakers", len(config.data_paths)) if default_hparams.num_speakers == 1: default_hparams.set_hparam("model_type", "single") else: default_hparams.set_hparam("model_type", "multi-speaker") prepare_dirs(config, default_hparams) log_path = os.path.join(config.model_dir, 'train.log') infolog.init(log_path, config.model_dir, config.slack_url) tf.set_random_seed(config.random_seed) print(config.data_paths) if config.load_path is not None and config.initialize_path is not None: raise Exception(" [!] Only one of load_path and initialize_path should be set") train(config.model_dir, config)
def main(ROOT_PATH,MODIFIED_DATASETS_PATH, MODEL_NAME, nFolders, segmentNum): MODEL_PATH = os.path.join(ROOT_PATH, "datasets/IEMOCAP" ,MODEL_NAME) makedirs(MODEL_PATH) logFolderName='exp/log/%s'%MODEL_NAME logFileName='%s/Extract_Segment_Level_Feats.log'%logFolderName log_path = os.path.join(ROOT_PATH, logFolderName) makedirs(log_path) log_path = os.path.join(ROOT_PATH, logFileName) infolog.init(log_path, ROOT_PATH) log ("Extracting segment-level features......................") for idx, subdir in enumerate(os.listdir(MODIFIED_DATASETS_PATH)): subdir=os.path.join(MODIFIED_DATASETS_PATH, subdir) extract_segment_level_features(subdir, MODEL_PATH, idx, segmentNum) log ("Saving datasets for Cross Validation...................") for idx in range(nFolders): save_cross_validation_5folder(idx, nFolders, MODEL_PATH)
def main(ROOT_PATH, MODEL_NAME, nFolders): MODEL_PATH = os.path.join(ROOT_PATH, "datasets/IEMOCAP", MODEL_NAME) makedirs(MODEL_PATH) logFolderName = 'exp/log/%s' % MODEL_NAME logFileName = '%s/Extract_Uttrance_Level_Feats.log' % logFolderName log_path = os.path.join(ROOT_PATH, logFolderName) makedirs(log_path) log_path = os.path.join(ROOT_PATH, logFileName) infolog.init(log_path, ROOT_PATH) acc_stat1 = np.zeros(2) for idx in range(nFolders): acc_stat1 += extract_utterance_level_features(idx, MODEL_PATH) log('=' * 50) log('Total Accuracy[ SVM ][ WAR UAR ]') log('[XX][ %s ]' % (acc_stat1 / nFolders)) log('=' * 50)
def main(ROOT_PATH, path_loadSession, path_directory, MODEL_NAME): logFolderName = 'exp/log/%s' % MODEL_NAME logFileName = '%s/prepareDatasets.log' % logFolderName log_path = os.path.join(ROOT_PATH, logFolderName) makedirs(log_path) log_path = os.path.join(ROOT_PATH, logFileName) infolog.init(log_path, ROOT_PATH) #make5thWaves("%s%s"%(path_loadSession,5)) for k in range(5): session_M, session_F = [], [] session = load_session("%s%s" % (path_loadSession, k + 1)) for idx in range(len(session)): if (session[idx][2] == 'M'): session_M.append(session[idx]) else: session_F.append(session[idx]) dic_M, dic_F = count_emotion(session_M), count_emotion(session_F) log('=' * 50) log('Total Session_%d_Male :' % (k + 1) + " %d" % sum(dic_M.values())) log('Total Session_%d_Female :' % (k + 1) + " %d" % sum(dic_F.values())) log(dic_M) log(dic_F) pathName1 = "%s/session%d_M/" % (path_directory, (k + 1)) pathName2 = "%s/session%d_F/" % (path_directory, (k + 1)) log('=' * 50) if save_wavFile(session_M, pathName1) == 0: log('Completed to save session_%d_Male Wave files successfully.' % (k + 1)) if save_wavFile(session_F, pathName2) == 0: log('Completed to save session_%d_Male Wave files successfully.' % (k + 1)) log('=' * 50)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', default='0') parser.add_argument('--log', '-l', default='') parser.add_argument('--restore_step', '-r', default=None) parser.add_argument('--tfr_dir', default='bc2013/training/tfrs_with_emo_feature') args = parser.parse_args() args.model = 'sygst_taco2' args.summary_interval = 200 args.checkpoint_interval = 5000 # args.summary_interval = 2 # args.checkpoint_interval = 5 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu log_dir = 'sygst_logs' + ('_' + args.log if args.log else '') os.makedirs(log_dir, exist_ok=True) tf.set_random_seed(hp.random_seed) infolog.init(os.path.join(log_dir, 'train.log'), args.model) train(log_dir, args)
def main(): def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError( 'Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') DATA_DIRECTORY = './data/kss,./data/son' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') LOGDIR = None #LOGDIR = './/logdir-wavenet//train//2018-12-21T22-58-10' parser.add_argument( '--logdir', type=str, default=LOGDIR, help= 'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.' ) parser.add_argument( '--logdir_root', type=str, default=None, help= 'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.' ) parser.add_argument( '--restore_from', type=str, default=None, help= 'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.' ) CHECKPOINT_EVERY = 1000 # checkpoint 저장 주기 parser.add_argument( '--checkpoint_every', type=int, default=CHECKPOINT_EVERY, help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 config.data_dir = config.data_dir.split(",") try: directories = validate_directories(config, hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from log_path = os.path.join(logdir, 'train.log') infolog.init(log_path, logdir) global_step = tf.Variable(0, name='global_step', trainable=False) # Create coordinator. coord = tf.train.Coordinator() num_speakers = len(config.data_dir) # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None gc_enable = num_speakers > 1 # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = DataFeederWavenet( coord, config.data_dir, batch_size=hparams.wavenet_batch_size, receptive_field=WaveNetModel.calculate_receptive_field( hparams.filter_width, hparams.dilations, hparams.scalar_input, hparams.initial_filter_width), gc_enable=gc_enable) if gc_enable: audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id else: print("didn't work") #audio_batch, lc_batch = reader.inputs_wav, local_condition # Create network. net = WaveNetModel( batch_size=hparams.wavenet_batch_size, dilations=hparams.dilations, filter_width=hparams.filter_width, residual_channels=hparams.residual_channels, dilation_channels=hparams.dilation_channels, quantization_channels=hparams.quantization_channels, out_channels=hparams.out_channels, skip_channels=hparams.skip_channels, use_biases=hparams.use_biases, # True scalar_input=hparams.scalar_input, initial_filter_width=hparams.initial_filter_width, global_condition_channels=hparams.gc_channels, global_condition_cardinality=num_speakers, local_condition_channels=hparams.num_mels, upsample_factor=hparams.upsample_factor, train_mode=True) if hparams.l2_regularization_strength == 0: hparams.l2_regularization_strength = None net.add_loss(input_batch=audio_batch, local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength) net.add_optimizer(hparams, global_step) run_metadata = tf.RunMetadata() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False) ) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver( var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: start_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or start_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) except: print( "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model." ) raise ########### start_step = sess.run(global_step) last_saved_step = start_step try: reader.start_in_session(sess, start_step) while not coord.should_stop(): start_time = time.time() if hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. log('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize]) duration = time.time() - start_time log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step if step >= hparams.num_steps: # error message가 나오지만, 여기서 멈춘 것은 맞다. raise Exception('End xxx~~~yyy') except Exception as e: print('finally') #if step > last_saved_step: # save(saver, sess, logdir, step) coord.request_stop(e)
def main(): def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError('Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') DATA_DIRECTORY = '/home/kjm/Tacotron2-Wavenet-Korean-TTS/data/monika,/home/kjm/Tacotron2-Wavenet-Korean-TTS/data/kss' #DATA_DIRECTORY = 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') #LOGDIR = None LOGDIR = './logdir-wavenet/train/2021-03-10T02-58-23' parser.add_argument('--logdir', type=str, default=LOGDIR,help='Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.') parser.add_argument('--logdir_root', type=str, default=None,help='Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.') parser.add_argument('--restore_from', type=str, default=None,help='Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.') CHECKPOINT_EVERY = 1000 # checkpoint 저장 주기 parser.add_argument('--checkpoint_every', type=int, default=CHECKPOINT_EVERY,help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') parser.add_argument('--eval_every', type=int, default=1000,help='Steps between eval on test data') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 config.data_dir = config.data_dir.split(",") try: directories = validate_directories(config,hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from log_path = os.path.join(logdir, 'train.log') infolog.init(log_path, logdir) global_step = tf.Variable(0, name='global_step', trainable=False) if hparams.l2_regularization_strength == 0: hparams.l2_regularization_strength = None # Create coordinator. coord = tf.train.Coordinator() num_speakers = len(config.data_dir) # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None gc_enable = True # Before: num_speakers > 1 After: 항상 True # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = DataFeederWavenet(coord,config.data_dir,batch_size=hparams.wavenet_batch_size,gc_enable= gc_enable,test_mode=False) # test를 위한 DataFeederWavenet를 하나 만들자. 여기서는 딱 1개의 파일만 가져온다. reader_test = DataFeederWavenet(coord,config.data_dir,batch_size=1,gc_enable= gc_enable,test_mode=True,queue_size=1) audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id # Create train network. net = create_network(hparams,hparams.wavenet_batch_size,num_speakers,is_training=True) net.add_loss(input_batch=audio_batch,local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength,upsample_type=hparams.upsample_type) net.add_optimizer(hparams,global_step) run_metadata = tf.RunMetadata() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: start_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or start_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) start_step=0 except: print("Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model.") raise ########### reader.start_in_session(sess,start_step) reader_test.start_in_session(sess,start_step) ################### Create test network. <---- Queue 생성 때문에, sess restore후 test network 생성 net_test = create_network(hparams,1,num_speakers,is_training=False) if hparams.scalar_input: samples = tf.placeholder(tf.float32,shape=[net_test.batch_size,None]) waveform = 2*np.random.rand(net_test.batch_size).reshape(net_test.batch_size,-1)-1 else: samples = tf.placeholder(tf.int32,shape=[net_test.batch_size,None]) # samples: mu_law_encode로 변환된 것. one-hot으로 변환되기 전. (batch_size, 길이) waveform = np.random.randint(hparams.quantization_channels,size=net_test.batch_size).reshape(net_test.batch_size,-1) upsampled_local_condition = tf.placeholder(tf.float32,shape=[net_test.batch_size,hparams.num_mels]) speaker_id = tf.placeholder(tf.int32,shape=[net_test.batch_size]) next_sample = net_test.predict_proba_incremental(samples,upsampled_local_condition,speaker_id) # Fast Wavenet Generation Algorithm-1611.09482 algorithm 적용 sess.run(net_test.queue_initializer) # test를 위한 placeholder는 모두 3개: samples,speaker_id,upsampled_local_condition # test용 mel-spectrogram을 하나 뽑자. 그것을 고정하지 않으면, thread가 계속 돌아가면서 data를 읽어온다. reader_test의 역할은 여기서 끝난다. mel_input_test, speaker_id_test = sess.run([reader_test.local_condition,reader_test.speaker_id]) with tf.variable_scope('wavenet',reuse=tf.AUTO_REUSE): upsampled_local_condition_data = net_test.create_upsample(mel_input_test,upsample_type=hparams.upsample_type) upsampled_local_condition_data_ = sess.run(upsampled_local_condition_data) # upsampled_local_condition_data_ 을 feed_dict로 placehoder인 upsampled_local_condition에 넣어준다. ###################################################### start_step = sess.run(global_step) step = last_saved_step = start_step try: while not coord.should_stop(): start_time = time.time() if hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. log('Storing metadata') run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) step, loss_value, _ = sess.run([global_step, net.loss, net.optimize],options=run_options,run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: step, loss_value, _ = sess.run([global_step,net.loss, net.optimize]) duration = time.time() - start_time log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step if step % config.eval_every == 0: # config.eval_every eval_step(sess,logdir,step,waveform,upsampled_local_condition_data_,speaker_id_test,mel_input_test,samples,speaker_id,upsampled_local_condition,next_sample) if step >= hparams.num_steps: # error message가 나오지만, 여기서 멈춘 것은 맞다. raise Exception('End xxx~~~yyy') except Exception as e: print('finally') log('Exiting due to exception: %s' % e, slack=True) #if step > last_saved_step: # save(saver, sess, logdir, step) traceback.print_exc() coord.request_stop(e)
'--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument( '--data_dir', default='training_data', help='Metadata file which contains the keys of audio and melspec') parser.add_argument('--ema_decay', type=float, default=0.9999, help='Moving average decay rate.') parser.add_argument('--num_workers', type=int, default=4, help='Number of dataloader workers.') parser.add_argument('--resume', type=str, default=None, help='Checkpoint path to resume') parser.add_argument('--checkpoint_dir', type=str, default='checkpoints/', help='Directory to save checkpoints.') args = parser.parse_args() os.makedirs(args.checkpoint_dir, exist_ok=True) infolog.init(os.path.join(args.checkpoint_dir, 'train.log'), 'FFTNET') hparams.parse(args.hparams) train_fn(args)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default=os.path.expanduser(os.getcwd())) parser.add_argument('--data', default='datasets/slt_arctic_full_data') parser.add_argument('--train_model', required=True, choices=['duration', 'acoustic']) parser.add_argument('--name', help='Name of the run. Used for logging. Defaults to model name.') parser.add_argument('--restore_step', type=int, help='Global step to restore from checkpoint.') # Parameter analysis args = parser.parse_args() data_root = os.path.join(args.base_dir, args.data) run_name = args.name or args.train_model log_dir = os.path.join(data_root, 'logs-{}'.format(run_name)) os.makedirs(log_dir, exist_ok=True) infolog.init(os.path.join(log_dir, 'train.log'), run_name) fname_list = ['dataset_ids.pkl', 'X_min.pkl', 'X_max.pkl', 'Y_mean.pkl', 'Y_scale.pkl'] with ExitStack() as stack: f = [stack.enter_context(open(os.path.join(data_root, fname), 'rb')) for fname in fname_list] metadata = pickle.load(f[0]) X_min = pickle.load(f[1]) X_max = pickle.load(f[2]) Y_mean = pickle.load(f[3]) Y_scale = pickle.load(f[4]) train_set = FeatureDataset(data_root, metadata, X_min, X_max, Y_mean, Y_scale, train=run_name) data_loader = torch.utils.data.DataLoader(train_set, collate_fn=dnn_collate, batch_size=hp.batch_size, shuffle=True, num_workers=0, pin_memory=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device: {}".format(device)) # Build model, create optimizer if args.train_model == 'duration': model = DurationModel( hp.duration_linguistic_dim, hp.hidden_size, hp.duration_dim, hp.num_layers).to(device) else: model = AcousticModel( hp.acoustic_linguistic_dim, hp.hidden_size, hp.acoustic_dim, hp.num_layers).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=hp.init_learning_rate, betas=(hp.adam_beta1, hp.adam_beta2), eps=hp.adam_eps, weight_decay=hp.weight_decay, amsgrad=hp.amsgrad) # Reload parameters from a checkpoint if args.restore_step: checkpoint_path = os.path.join(log_dir, 'model.ckpt-{}.pth'.format(args.restore_step)) model = load_checkpoint(checkpoint_path, model, optimizer, False) print("Resuming from checkpoint:{}".format(checkpoint_path)) # Train loop try: train_loop(device, model, optimizer, data_loader, log_dir) except KeyboardInterrupt: print("Interrupted!") pass finally: print("Saving checkpoint....") save_checkpoint(device, model, optimizer, global_step, global_epoch, log_dir)