def main05(ROOT_PATH, path_loadSession, path_directory, MODEL_NAME):
    logFolderName = 'exp/log/%s' % MODEL_NAME
    logFileName = '%s/prepareDatasets.log' % logFolderName
    log_path = os.path.join(ROOT_PATH, logFolderName)
    makedirs(log_path)
    log_path = os.path.join(ROOT_PATH, logFileName)
    infolog.init(log_path, ROOT_PATH)

    #make5thWaves("%s%s"%(path_loadSession,5))
    for k in range(5):
        session_ = []
        session = load_session("%s%s" % (path_loadSession, k + 1))
        for idx in range(len(session)):
            session_.append(session[idx])

        dic_ = count_emotion(session_)
        log('=' * 50)
        log('Total Session_%d :' % (k + 1) + " %d" % sum(dic_.values()))
        log(dic_)
        pathName1 = "%s/session%d/" % (path_directory, (k + 1))
        log('=' * 50)
        if save_wavFile(session_, pathName1) == 0:
            log('Completed to save session_%d Wave files successfully.' %
                (k + 1))
    log('=' * 50)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--base_dir', default='')
    parser.add_argument('--input', default='training/train.txt')
    parser.add_argument('--model', default='Tacotron')
    parser.add_argument(
        '--name',
        help='Name of the run, Used for logging, Defaults to model name')
    parser.add_argument('--restore',
                        type=bool,
                        default=True,
                        help='Set this to False to do a fresh training')
    parser.add_argument('--summary_interval',
                        type=int,
                        default=10,
                        help='Steps between running summary ops')
    parser.add_argument('--checkpoint_interval',
                        type=int,
                        default=100,
                        help='Steps between writing checkpoints')
    parser.add_argument('--tf_log_level',
                        type=int,
                        default=1,
                        help='Tensorflow C++ log level.')
    args = parser.parse_args()

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level)
    run_name = args.name or args.model
    log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name))
    os.makedirs(log_dir, exist_ok=True)
    infolog.init(os.path.join(log_dir, 'Terminal_train_log'), run_name)
    args.hparams = hparams
    train(log_dir, args)
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--log_dir', default='logdir-tacotron2')

    parser.add_argument(
        '--data_paths',
        default=
        'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\son'
    )
    #parser.add_argument('--data_paths', default='D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\small1,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\small2')

    #parser.add_argument('--load_path', default=None)   # 아래의 'initialize_path'보다 우선 적용
    parser.add_argument(
        '--load_path', default='logdir-tacotron2/moon+son_2019-03-01_10-35-44')

    parser.add_argument(
        '--initialize_path',
        default=None)  # ckpt로 부터 model을 restore하지만, global step은 0에서 시작

    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--num_test_per_speaker', type=int, default=2)
    parser.add_argument('--random_seed', type=int, default=123)
    parser.add_argument('--summary_interval', type=int, default=100)

    parser.add_argument('--test_interval', type=int, default=500)  # 500

    parser.add_argument('--checkpoint_interval', type=int,
                        default=2000)  # 2000
    parser.add_argument('--skip_path_filter',
                        type=str2bool,
                        default=False,
                        help='Use only for debugging')

    parser.add_argument('--slack_url',
                        help='Slack webhook URL to get periodic reports.')
    parser.add_argument(
        '--git',
        action='store_true',
        help='If set, verify that the client is clean.'
    )  # The store_true option automatically creates a default value of False.

    config = parser.parse_args()
    config.data_paths = config.data_paths.split(",")
    setattr(hparams, "num_speakers", len(config.data_paths))

    prepare_dirs(config, hparams)

    log_path = os.path.join(config.model_dir, 'train.log')
    infolog.init(log_path, config.model_dir, config.slack_url)

    tf.set_random_seed(config.random_seed)
    print(config.data_paths)

    if config.load_path is not None and config.initialize_path is not None:
        raise Exception(
            " [!] Only one of load_path and initialize_path should be set")

    train(config.model_dir, config)
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()  # argparse 라이브러리를 사용
    '''
    add_argument를 통해 명령행 옵션을 추가한다
    
    위치인자, 옵션인자
    위치인자 : '명령행 옵션'에 -, -- 가 안붙는 형태, 선언 순서가 명령행 입력 순서에 영향을 끼친다
    옵션인자 : '명령행 옵션'에 -, -- 가 붙는 형태
    옵션인자는 명령행에 없어도 되지만 위치인자는 없으면 오류를 낸다
    
    add_argument('명령행 옵션', type=x, default=y, action='store_true', help='hello, python')
    '명령행 옵션' : 사용하고 싶은 명령어 옵션 명명
    type=x : 명령행 옵션을 통해 받는 변수의 type 지정
    default=y : 해당 옵션을 사용하지 않아도 dafault=y 값이 사용됨. 평상시에는 None의 값을 가짐
    action='store_true' : 명령행 옵션을 변수명으로 이용시 반환값을 True로 지정. default 값은 False
    help='hello, python' : -h, --help 를 사용할때 나타나는 명령행 옵션 옆에서 명령행 옵션에 대한 설명을 서술하는 용도
    '''
    parser.add_argument('--log_dir', default='logs')
    parser.add_argument('--data_paths', default='datasets/kr_example')
    parser.add_argument('--load_path', default=None)
    parser.add_argument('--initialize_path', default=None)

    parser.add_argument('--num_test_per_speaker', type=int, default=2)
    parser.add_argument('--random_seed', type=int, default=123)
    parser.add_argument('--summary_interval', type=int, default=100)
    parser.add_argument('--test_interval', type=int, default=500)
    parser.add_argument('--checkpoint_interval', type=int, default=1000)
    parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging')
    # utils 폴더의 __init__.py 내부의 str2bool 함수 사용

    parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.')
    parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.')

    config = parser.parse_args()  # 명령행 인자값 파싱
    config.data_paths = config.data_paths.split(",")  # (주의!)data_paths(default:datasets/kr_example)를 split.
    setattr(hparams, "num_speakers", len(config.data_paths))  # hparams의 "num_speakers에 값 len(config.data_paths) 설정"

    prepare_dirs(config, hparams)

    log_path = os.path.join(config.model_dir, 'train.log')  # config.model_dir, 'train.log' 경로 합침
    infolog.init(log_path, config.model_dir, config.slack_url)

    tf.set_random_seed(config.random_seed)
    print(config.data_paths)

    if any("krbook" not in data_path for data_path in config.data_paths) and \
            hparams.sample_rate != 20000:
        warning("Detect non-krbook dataset. May need to set sampling rate from {} to 20000".\
                format(hparams.sample_rate))
        
    if any('LJ' in data_path for data_path in config.data_paths) and \
           hparams.sample_rate != 22050:
        warning("Detect LJ Speech dataset. Set sampling rate from {} to 22050".\
                format(hparams.sample_rate))

    if config.load_path is not None and config.initialize_path is not None:
        raise Exception(" [!] Only one of load_path and initialize_path should be set")

    train(config.model_dir, config)
Esempio n. 5
0
def prepare_run(args):
    modified_hp = hparams.parse(args.hparams)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level)
    run_name = args.name or args.model
    log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name))
    os.makedirs(log_dir, exist_ok=True)
    infolog.init(os.path.join(log_dir, 'Terminal_train_log'), run_name,
                 args.slack_url)
    return log_dir, modified_hp
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--log_dir', default='logs')
    parser.add_argument('--data_paths', default='datasets/LJSpeech_1_0')
    parser.add_argument('--load_path', default=None)
    parser.add_argument('--initialize_path', default=None)

    parser.add_argument('--model',
                        default='tacotron',
                        help='tacotron or tacotron2')
    parser.add_argument('--num_test_per_speaker', type=int, default=2)
    parser.add_argument('--random_seed', type=int, default=123)
    parser.add_argument('--summary_interval', type=int, default=20)
    parser.add_argument('--test_interval', type=int, default=100)
    parser.add_argument('--checkpoint_interval', type=int, default=500)
    parser.add_argument('--skip_path_filter',
                        type=str2bool,
                        default=True,
                        help='Use only for debugging')

    parser.add_argument('--slack_url',
                        help='Slack webhook URL to get periodic reports.')
    parser.add_argument('--git',
                        action='store_true',
                        help='If set, verify that the client is clean.')

    config = parser.parse_args()
    config.data_paths = config.data_paths.split(",")
    setattr(hparams, "num_speakers", len(config.data_paths))

    prepare_dirs(config, hparams)

    log_path = os.path.join(config.model_dir, 'train.log')
    infolog.init(log_path, config.model_dir, config.slack_url)

    tf.set_random_seed(config.random_seed)
    print(config.data_paths)

    if any("krbook" not in data_path for data_path in config.data_paths) and \
            hparams.sample_rate != 20000:
        warning("Detect non-krbook dataset. May need to set sampling rate from {} to 20000". \
                format(hparams.sample_rate))

    if any('LJ' in data_path for data_path in config.data_paths) and \
            hparams.sample_rate != 22050:
        warning("Detect LJ Speech dataset. Set sampling rate from {} to 22050". \
                format(hparams.sample_rate))

    if config.load_path is not None and config.initialize_path is not None:
        raise Exception(
            " [!] Only one of load_path and initialize_path should be set")

    train(config.model_dir, config)
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--log_dir', default='logdir-tacotron')
    
    parser.add_argument('--data_paths', default='./data/IU,./data/kss')
        
    parser.add_argument('--load_path', default='./logdir-tacotron/IU+kss_2019-01-08_03-26-23')   # 아래의 'initialize_path'보다 우선 적용
    #parser.add_argument('--load_path', default='logdir-tacotron/moon+son_2018-12-25_19-03-21')
    
    
    parser.add_argument('--initialize_path', default=None)   # ckpt로 부터 model을 restore하지만, global step은 0에서 시작

    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--num_test_per_speaker', type=int, default=2)
    parser.add_argument('--random_seed', type=int, default=123)
    parser.add_argument('--summary_interval', type=int, default=100000)
    parser.add_argument('--test_interval', type=int, default=500)  # 500
    parser.add_argument('--checkpoint_interval', type=int, default=5000) # 2000
    parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging')

    parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.')
    parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.')  # The store_true option automatically creates a default value of False.

    config = parser.parse_args()
    config.data_paths = config.data_paths.split(",")
    setattr(hparams, "num_speakers", len(config.data_paths))

    prepare_dirs(config, hparams)

    log_path = os.path.join(config.model_dir, 'train.log')
    infolog.init(log_path, config.model_dir, config.slack_url)

    tf.set_random_seed(config.random_seed)
    print(config.data_paths)

    if any("krbook" not in data_path for data_path in config.data_paths) and  hparams.sample_rate != 20000:
        warning("Detect non-krbook dataset. May need to set sampling rate from {} to 20000".format(hparams.sample_rate))
        
    if any('LJ' in data_path for data_path in config.data_paths) and  hparams.sample_rate != 22050:
        warning("Detect LJ Speech dataset. Set sampling rate from {} to 22050".format(hparams.sample_rate))

    if config.load_path is not None and config.initialize_path is not None:
        raise Exception(" [!] Only one of load_path and initialize_path should be set")
    #print('===config.model_dir====')
    #print(config.model_dir)
    train(config.model_dir, config)
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--log_dir', default='logdir-tacotron2')
    
    parser.add_argument('--data_paths', default=None)    
    
    parser.add_argument('--load_path', default=None)   # 아래의 'initialize_path'보다 우선 적용
    parser.add_argument('--initialize_path', default=None)   # ckpt로 부터 model을 restore하지만, global step은 0에서 시작

    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--num_test_per_speaker', type=int, default=2)
    parser.add_argument('--random_seed', type=int, default=123)
    parser.add_argument('--summary_interval', type=int, default=100)
    
    parser.add_argument('--test_interval', type=int, default=500)  # 500
    
    parser.add_argument('--checkpoint_interval', type=int, default=2000) # 2000
    parser.add_argument('--skip_path_filter', type=str2bool, default=False, help='Use only for debugging')
    parser.add_argument('--lang', default='Korean')

    parser.add_argument('--slack_url', help='Slack webhook URL to get periodic reports.')
    parser.add_argument('--git', action='store_true', help='If set, verify that the client is clean.')  # The store_true option automatically creates a default value of False.

    config = parser.parse_args()
    if not config.data_paths:
        raise Exception("data paths are not set")
    config.data_paths = config.data_paths.split(",")
    setattr(default_hparams, "num_speakers", len(config.data_paths))
    if default_hparams.num_speakers == 1:
        default_hparams.set_hparam("model_type", "single")
    else:
        default_hparams.set_hparam("model_type", "multi-speaker")

    prepare_dirs(config, default_hparams)

    log_path = os.path.join(config.model_dir, 'train.log')
    infolog.init(log_path, config.model_dir, config.slack_url)

    tf.set_random_seed(config.random_seed)
    print(config.data_paths)


    if config.load_path is not None and config.initialize_path is not None:
        raise Exception(" [!] Only one of load_path and initialize_path should be set")

    train(config.model_dir, config)
def main(ROOT_PATH,MODIFIED_DATASETS_PATH, MODEL_NAME, nFolders, segmentNum):
    MODEL_PATH = os.path.join(ROOT_PATH, "datasets/IEMOCAP" ,MODEL_NAME)
    makedirs(MODEL_PATH)

    logFolderName='exp/log/%s'%MODEL_NAME
    logFileName='%s/Extract_Segment_Level_Feats.log'%logFolderName
    log_path = os.path.join(ROOT_PATH, logFolderName)
    makedirs(log_path)
    log_path = os.path.join(ROOT_PATH, logFileName)
    infolog.init(log_path, ROOT_PATH)

    log ("Extracting segment-level features......................")
    for idx, subdir in enumerate(os.listdir(MODIFIED_DATASETS_PATH)):
        subdir=os.path.join(MODIFIED_DATASETS_PATH, subdir)
        extract_segment_level_features(subdir, MODEL_PATH, idx, segmentNum)

    log ("Saving datasets for Cross Validation...................")
    for idx in range(nFolders):
        save_cross_validation_5folder(idx, nFolders, MODEL_PATH)  
def main(ROOT_PATH, MODEL_NAME, nFolders):
    MODEL_PATH = os.path.join(ROOT_PATH, "datasets/IEMOCAP", MODEL_NAME)
    makedirs(MODEL_PATH)

    logFolderName = 'exp/log/%s' % MODEL_NAME
    logFileName = '%s/Extract_Uttrance_Level_Feats.log' % logFolderName
    log_path = os.path.join(ROOT_PATH, logFolderName)
    makedirs(log_path)
    log_path = os.path.join(ROOT_PATH, logFileName)
    infolog.init(log_path, ROOT_PATH)

    acc_stat1 = np.zeros(2)
    for idx in range(nFolders):
        acc_stat1 += extract_utterance_level_features(idx, MODEL_PATH)

    log('=' * 50)
    log('Total Accuracy[ SVM ][ WAR UAR ]')
    log('[XX][ %s ]' % (acc_stat1 / nFolders))
    log('=' * 50)
def main(ROOT_PATH, path_loadSession, path_directory, MODEL_NAME):
    logFolderName = 'exp/log/%s' % MODEL_NAME
    logFileName = '%s/prepareDatasets.log' % logFolderName
    log_path = os.path.join(ROOT_PATH, logFolderName)
    makedirs(log_path)
    log_path = os.path.join(ROOT_PATH, logFileName)
    infolog.init(log_path, ROOT_PATH)

    #make5thWaves("%s%s"%(path_loadSession,5))
    for k in range(5):
        session_M, session_F = [], []

        session = load_session("%s%s" % (path_loadSession, k + 1))
        for idx in range(len(session)):
            if (session[idx][2] == 'M'):
                session_M.append(session[idx])
            else:
                session_F.append(session[idx])

        dic_M, dic_F = count_emotion(session_M), count_emotion(session_F)
        log('=' * 50)
        log('Total Session_%d_Male :' % (k + 1) + " %d" % sum(dic_M.values()))
        log('Total Session_%d_Female :' % (k + 1) +
            " %d" % sum(dic_F.values()))
        log(dic_M)
        log(dic_F)

        pathName1 = "%s/session%d_M/" % (path_directory, (k + 1))
        pathName2 = "%s/session%d_F/" % (path_directory, (k + 1))
        log('=' * 50)

        if save_wavFile(session_M, pathName1) == 0:
            log('Completed to save session_%d_Male Wave files successfully.' %
                (k + 1))
        if save_wavFile(session_F, pathName2) == 0:
            log('Completed to save session_%d_Male Wave files successfully.' %
                (k + 1))
    log('=' * 50)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', default='0')
    parser.add_argument('--log', '-l', default='')
    parser.add_argument('--restore_step', '-r', default=None)
    parser.add_argument('--tfr_dir',
                        default='bc2013/training/tfrs_with_emo_feature')
    args = parser.parse_args()

    args.model = 'sygst_taco2'
    args.summary_interval = 200
    args.checkpoint_interval = 5000
    # args.summary_interval = 2
    # args.checkpoint_interval = 5

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    log_dir = 'sygst_logs' + ('_' + args.log if args.log else '')
    os.makedirs(log_dir, exist_ok=True)

    tf.set_random_seed(hp.random_seed)
    infolog.init(os.path.join(log_dir, 'train.log'), args.model)

    train(log_dir, args)
Esempio n. 13
0
def main():
    def _str_to_bool(s):
        """Convert string to bool (in argparse context)."""
        if s.lower() not in ['true', 'false']:
            raise ValueError(
                'Argument needs to be a boolean, got {}'.format(s))
        return {'true': True, 'false': False}[s.lower()]

    parser = argparse.ArgumentParser(description='WaveNet example network')

    DATA_DIRECTORY = './data/kss,./data/son'
    parser.add_argument('--data_dir',
                        type=str,
                        default=DATA_DIRECTORY,
                        help='The directory containing the VCTK corpus.')

    LOGDIR = None
    #LOGDIR = './/logdir-wavenet//train//2018-12-21T22-58-10'

    parser.add_argument(
        '--logdir',
        type=str,
        default=LOGDIR,
        help=
        'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.'
    )

    parser.add_argument(
        '--logdir_root',
        type=str,
        default=None,
        help=
        'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.'
    )
    parser.add_argument(
        '--restore_from',
        type=str,
        default=None,
        help=
        'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.'
    )

    CHECKPOINT_EVERY = 1000  # checkpoint 저장 주기
    parser.add_argument(
        '--checkpoint_every',
        type=int,
        default=CHECKPOINT_EVERY,
        help='How many steps to save each checkpoint after. Default: ' +
        str(CHECKPOINT_EVERY) + '.')

    config = parser.parse_args()  # command 창에서 입력받을 수 있는 조건
    config.data_dir = config.data_dir.split(",")

    try:
        directories = validate_directories(config, hparams)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from

    log_path = os.path.join(logdir, 'train.log')
    infolog.init(log_path, logdir)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Create coordinator.
    coord = tf.train.Coordinator()
    num_speakers = len(config.data_dir)
    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None
        gc_enable = num_speakers > 1

        # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다.
        reader = DataFeederWavenet(
            coord,
            config.data_dir,
            batch_size=hparams.wavenet_batch_size,
            receptive_field=WaveNetModel.calculate_receptive_field(
                hparams.filter_width, hparams.dilations, hparams.scalar_input,
                hparams.initial_filter_width),
            gc_enable=gc_enable)
        if gc_enable:
            audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id
        else:
            print("didn't work")
            #audio_batch, lc_batch = reader.inputs_wav, local_condition

    # Create network.
    net = WaveNetModel(
        batch_size=hparams.wavenet_batch_size,
        dilations=hparams.dilations,
        filter_width=hparams.filter_width,
        residual_channels=hparams.residual_channels,
        dilation_channels=hparams.dilation_channels,
        quantization_channels=hparams.quantization_channels,
        out_channels=hparams.out_channels,
        skip_channels=hparams.skip_channels,
        use_biases=hparams.use_biases,  #  True
        scalar_input=hparams.scalar_input,
        initial_filter_width=hparams.initial_filter_width,
        global_condition_channels=hparams.gc_channels,
        global_condition_cardinality=num_speakers,
        local_condition_channels=hparams.num_mels,
        upsample_factor=hparams.upsample_factor,
        train_mode=True)

    if hparams.l2_regularization_strength == 0:
        hparams.l2_regularization_strength = None

    net.add_loss(input_batch=audio_batch,
                 local_condition=lc_batch,
                 global_condition_batch=gc_id_batch,
                 l2_regularization_strength=hparams.l2_regularization_strength)
    net.add_optimizer(hparams, global_step)

    run_metadata = tf.RunMetadata()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)
                      )  # log_device_placement=False --> cpu/gpu 자동 배치.
    init = tf.global_variables_initializer()
    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(
        var_list=tf.global_variables(),
        max_to_keep=hparams.max_checkpoints)  # 최대 checkpoint 저장 갯수 지정

    try:
        start_step = load(saver, sess, restore_from)  # checkpoint load
        if is_overwritten_training or start_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            zero_step_assign = tf.assign(global_step, 0)
            sess.run(zero_step_assign)

    except:
        print(
            "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model."
        )
        raise

    ###########

    start_step = sess.run(global_step)
    last_saved_step = start_step
    try:
        reader.start_in_session(sess, start_step)
        while not coord.should_stop():

            start_time = time.time()
            if hparams.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                log('Storing metadata')
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                step, loss_value, _ = sess.run(
                    [global_step, net.loss, net.optimize],
                    options=run_options,
                    run_metadata=run_metadata)

                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                step, loss_value, _ = sess.run(
                    [global_step, net.loss, net.optimize])

            duration = time.time() - start_time
            log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))

            if step % config.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step

            if step >= hparams.num_steps:
                # error message가 나오지만, 여기서 멈춘 것은 맞다.
                raise Exception('End xxx~~~yyy')

    except Exception as e:
        print('finally')
        #if step > last_saved_step:
        #    save(saver, sess, logdir, step)

        coord.request_stop(e)
Esempio n. 14
0
def main():
    def _str_to_bool(s):
        """Convert string to bool (in argparse context)."""
        if s.lower() not in ['true', 'false']:
            raise ValueError('Argument needs to be a boolean, got {}'.format(s))
        return {'true': True, 'false': False}[s.lower()]
    
    
    parser = argparse.ArgumentParser(description='WaveNet example network')
    
    DATA_DIRECTORY = '/home/kjm/Tacotron2-Wavenet-Korean-TTS/data/monika,/home/kjm/Tacotron2-Wavenet-Korean-TTS/data/kss'
    #DATA_DIRECTORY =  'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon'
    parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.')


    #LOGDIR = None
    LOGDIR = './logdir-wavenet/train/2021-03-10T02-58-23'

    parser.add_argument('--logdir', type=str, default=LOGDIR,help='Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.')
    
    
    parser.add_argument('--logdir_root', type=str, default=None,help='Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.')
    parser.add_argument('--restore_from', type=str, default=None,help='Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.')
    
    
    CHECKPOINT_EVERY = 1000   # checkpoint 저장 주기
    parser.add_argument('--checkpoint_every', type=int, default=CHECKPOINT_EVERY,help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.')
    
    
    parser.add_argument('--eval_every', type=int, default=1000,help='Steps between eval on test data')
    
   
    
    config = parser.parse_args()  # command 창에서 입력받을 수 있는 조건
    config.data_dir = config.data_dir.split(",")
    
    try:
        directories = validate_directories(config,hparams)
    except ValueError as e:
        print("Some arguments are wrong:")
        print(str(e))
        return

    logdir = directories['logdir']
    restore_from = directories['restore_from']

    # Even if we restored the model, we will treat it as new training
    # if the trained model is written into an arbitrary location.
    is_overwritten_training = logdir != restore_from


    log_path = os.path.join(logdir, 'train.log')
    infolog.init(log_path, logdir)


    global_step = tf.Variable(0, name='global_step', trainable=False)

    if hparams.l2_regularization_strength == 0:
        hparams.l2_regularization_strength = None


    # Create coordinator.
    coord = tf.train.Coordinator()
    num_speakers = len(config.data_dir)
    # Load raw waveform from VCTK corpus.
    with tf.name_scope('create_inputs'):
        # Allow silence trimming to be skipped by specifying a threshold near
        # zero.
        silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None
        gc_enable = True  # Before: num_speakers > 1    After: 항상 True
        
        # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다.
        reader = DataFeederWavenet(coord,config.data_dir,batch_size=hparams.wavenet_batch_size,gc_enable= gc_enable,test_mode=False)
        
        # test를 위한 DataFeederWavenet를 하나 만들자. 여기서는 딱 1개의 파일만 가져온다.
        reader_test = DataFeederWavenet(coord,config.data_dir,batch_size=1,gc_enable= gc_enable,test_mode=True,queue_size=1)
        
        

        audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id


    # Create train network.
    net = create_network(hparams,hparams.wavenet_batch_size,num_speakers,is_training=True)
    net.add_loss(input_batch=audio_batch,local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength,upsample_type=hparams.upsample_type)
    net.add_optimizer(hparams,global_step)



    run_metadata = tf.RunMetadata()

    # Set up session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))  # log_device_placement=False --> cpu/gpu 자동 배치.
    init = tf.global_variables_initializer()
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints)  # 최대 checkpoint 저장 갯수 지정
    
    try:
        start_step = load(saver, sess, restore_from)  # checkpoint load
        if is_overwritten_training or start_step is None:
            # The first training step will be saved_global_step + 1,
            # therefore we put -1 here for new or overwritten trainings.
            zero_step_assign = tf.assign(global_step, 0)
            sess.run(zero_step_assign)
            start_step=0
    except:
        print("Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model.")
        raise


    ###########

    reader.start_in_session(sess,start_step)
    reader_test.start_in_session(sess,start_step)
    
    ################### Create test network.  <---- Queue 생성 때문에, sess restore후 test network 생성
    net_test = create_network(hparams,1,num_speakers,is_training=False)
  
    if hparams.scalar_input:
        samples = tf.placeholder(tf.float32,shape=[net_test.batch_size,None])
        waveform = 2*np.random.rand(net_test.batch_size).reshape(net_test.batch_size,-1)-1
        
    else:
        samples = tf.placeholder(tf.int32,shape=[net_test.batch_size,None])  # samples: mu_law_encode로 변환된 것. one-hot으로 변환되기 전. (batch_size, 길이)
        waveform = np.random.randint(hparams.quantization_channels,size=net_test.batch_size).reshape(net_test.batch_size,-1)
    upsampled_local_condition = tf.placeholder(tf.float32,shape=[net_test.batch_size,hparams.num_mels])  
    
        

    speaker_id = tf.placeholder(tf.int32,shape=[net_test.batch_size])  
    next_sample = net_test.predict_proba_incremental(samples,upsampled_local_condition,speaker_id)  # Fast Wavenet Generation Algorithm-1611.09482 algorithm 적용

        
    sess.run(net_test.queue_initializer)
    



    # test를 위한 placeholder는 모두 3개: samples,speaker_id,upsampled_local_condition
    # test용 mel-spectrogram을 하나 뽑자. 그것을 고정하지 않으면, thread가 계속 돌아가면서 data를 읽어온다.  reader_test의 역할은 여기서 끝난다.

    mel_input_test, speaker_id_test = sess.run([reader_test.local_condition,reader_test.speaker_id])


    with tf.variable_scope('wavenet',reuse=tf.AUTO_REUSE):
        upsampled_local_condition_data = net_test.create_upsample(mel_input_test,upsample_type=hparams.upsample_type)
        upsampled_local_condition_data_ = sess.run(upsampled_local_condition_data)  # upsampled_local_condition_data_ 을 feed_dict로 placehoder인 upsampled_local_condition에 넣어준다.

    ######################################################
    
    
    start_step = sess.run(global_step)
    step = last_saved_step = start_step
    try:        
        
        while not coord.should_stop():
            
            start_time = time.time()
            if hparams.store_metadata and step % 50 == 0:
                # Slow run that stores extra information for debugging.
                log('Storing metadata')
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                step, loss_value, _ = sess.run([global_step, net.loss, net.optimize],options=run_options,run_metadata=run_metadata)

                tl = timeline.Timeline(run_metadata.step_stats)
                timeline_path = os.path.join(logdir, 'timeline.trace')
                with open(timeline_path, 'w') as f:
                    f.write(tl.generate_chrome_trace_format(show_memory=True))
            else:
                step, loss_value, _ = sess.run([global_step,net.loss, net.optimize])

            duration = time.time() - start_time
            log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
            
            
            if step % config.checkpoint_every == 0:
                save(saver, sess, logdir, step)
                last_saved_step = step
                
                
            if step % config.eval_every == 0:  # config.eval_every
                eval_step(sess,logdir,step,waveform,upsampled_local_condition_data_,speaker_id_test,mel_input_test,samples,speaker_id,upsampled_local_condition,next_sample)
            
            if step >= hparams.num_steps:
                # error message가 나오지만, 여기서 멈춘 것은 맞다.
                raise Exception('End xxx~~~yyy')
            
    except Exception as e:
        print('finally')
        log('Exiting due to exception: %s' % e, slack=True)
        #if step > last_saved_step:
        #    save(saver, sess, logdir, step)        
        traceback.print_exc()
        coord.request_stop(e)
Esempio n. 15
0
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument(
        '--data_dir',
        default='training_data',
        help='Metadata file which contains the keys of audio and melspec')
    parser.add_argument('--ema_decay',
                        type=float,
                        default=0.9999,
                        help='Moving average decay rate.')
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='Number of dataloader workers.')
    parser.add_argument('--resume',
                        type=str,
                        default=None,
                        help='Checkpoint path to resume')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='checkpoints/',
                        help='Directory to save checkpoints.')
    args = parser.parse_args()
    os.makedirs(args.checkpoint_dir, exist_ok=True)
    infolog.init(os.path.join(args.checkpoint_dir, 'train.log'), 'FFTNET')
    hparams.parse(args.hparams)
    train_fn(args)
Esempio n. 16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--base_dir', default=os.path.expanduser(os.getcwd()))
    parser.add_argument('--data', default='datasets/slt_arctic_full_data')
    parser.add_argument('--train_model', required=True, choices=['duration', 'acoustic'])
    parser.add_argument('--name', help='Name of the run. Used for logging. Defaults to model name.')
    parser.add_argument('--restore_step', type=int, help='Global step to restore from checkpoint.')

    # Parameter analysis
    args = parser.parse_args()
    data_root = os.path.join(args.base_dir, args.data)
    run_name = args.name or args.train_model
    log_dir = os.path.join(data_root, 'logs-{}'.format(run_name))
    os.makedirs(log_dir, exist_ok=True)
    infolog.init(os.path.join(log_dir, 'train.log'), run_name)

    fname_list = ['dataset_ids.pkl', 'X_min.pkl', 'X_max.pkl', 'Y_mean.pkl', 'Y_scale.pkl']
    with ExitStack() as stack:
        f = [stack.enter_context(open(os.path.join(data_root, fname), 'rb')) for fname in fname_list]
        metadata = pickle.load(f[0])
        X_min = pickle.load(f[1])
        X_max = pickle.load(f[2])
        Y_mean = pickle.load(f[3])
        Y_scale = pickle.load(f[4])

    train_set = FeatureDataset(data_root, metadata, X_min, X_max, Y_mean, Y_scale, train=run_name)
    data_loader = torch.utils.data.DataLoader(train_set,
                                              collate_fn=dnn_collate,
                                              batch_size=hp.batch_size,
                                              shuffle=True, num_workers=0, pin_memory=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device: {}".format(device))

    # Build model, create optimizer
    if args.train_model == 'duration':
        model = DurationModel(
            hp.duration_linguistic_dim, hp.hidden_size, hp.duration_dim, hp.num_layers).to(device)
    else:
        model = AcousticModel(
            hp.acoustic_linguistic_dim, hp.hidden_size, hp.acoustic_dim, hp.num_layers).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hp.init_learning_rate,
                                 betas=(hp.adam_beta1, hp.adam_beta2),
                                 eps=hp.adam_eps,
                                 weight_decay=hp.weight_decay,
                                 amsgrad=hp.amsgrad)

    # Reload parameters from a checkpoint
    if args.restore_step:
        checkpoint_path = os.path.join(log_dir, 'model.ckpt-{}.pth'.format(args.restore_step))
        model = load_checkpoint(checkpoint_path, model, optimizer, False)
        print("Resuming from checkpoint:{}".format(checkpoint_path))

    # Train loop
    try:
        train_loop(device, model, optimizer, data_loader, log_dir)
    except KeyboardInterrupt:
        print("Interrupted!")
        pass
    finally:
        print("Saving checkpoint....")
        save_checkpoint(device, model, optimizer, global_step, global_epoch, log_dir)