def test_preprocessing(feature_type): # make arguments args = make_args(feature_type=feature_type) # prepare dummy wav files wavdir = "tmp/wav" if not os.path.exists(wavdir): os.makedirs(wavdir) for i in range(5): make_dummy_wav(wavdir + "/%d.wav" % i, 8000, args.fs) # feature extract wav_list = find_files(wavdir, "*.wav") if not os.path.exists(args.wavdir): os.makedirs(args.wavdir) if args.feature_type == "world": world_feature_extract(wav_list, args) elif args.feature_type == "melspc": melspectrogram_extract(wav_list, args) else: melcepstrum_extract(wav_list, args) # calc_stats file_list = find_files(args.hdf5dir, "*.h5") calc_stats(file_list, args) # noise shaping if feature_type != "melspc": wav_list = find_files(args.wavdir, "*.wav") if not os.path.exists(args.outdir): os.makedirs(args.outdir) if not check_hdf5(args.stats, "/mlsa/coef"): avg_mcep = read_hdf5(args.stats, args.feature_type + "/mean") if args.feature_type == "world": avg_mcep = avg_mcep[args.mcep_dim_start:args.mcep_dim_end] mlsa_coef = convert_mcep_to_mlsa_coef(avg_mcep, args.mag, args.mcep_alpha) write_hdf5(args.stats, "/mlsa/coef", mlsa_coef) write_hdf5(args.stats, "/mlsa/alpha", args.mcep_alpha) noise_shaping(wav_list, args) # remove shutil.rmtree("tmp")
def test_train_generator(): # make dummy wavfiles wavdir = "data/wav" if not os.path.exists(wavdir): os.makedirs(wavdir) for i in range(5): make_dummy_wav(wavdir + "/%d.wav" % i) # make features feat_args = make_feature_args() wav_list = find_files(wavdir, "*.wav") if not os.path.exists(feat_args.wavdir): os.makedirs(feat_args.wavdir) feat_args.feature_type = "melspc" melspectrogram_extract(wav_list, feat_args) feat_args.feature_type = "world" world_feature_extract(wav_list, feat_args) feat_list = find_files(feat_args.hdf5dir, "*.h5") for ft in ["world", "melspc"]: # ---------------------------------- # minibatch without upsampling layer # ---------------------------------- generator_args = make_train_generator_args(wav_list=wav_list, feat_list=feat_list, feature_type=ft, use_upsampling_layer=False, batch_length=10000, batch_size=5) generator = train_generator(**vars(generator_args)) (x, h), t = next(generator) assert x.size(0) == t.size(0) == h.size(0) assert x.size(1) == t.size(1) == h.size(2) # ---------------------------------------- # utterance batch without upsampling layer # ---------------------------------------- generator_args = make_train_generator_args(wav_list=wav_list, feat_list=feat_list, feature_type=ft, use_upsampling_layer=False, batch_length=None, batch_size=5) generator = train_generator(**vars(generator_args)) (x, h), t = next(generator) assert x.size(0) == t.size(0) == h.size(0) == 1 assert x.size(1) == t.size(1) == h.size(2) # ------------------------------- # minibatch with upsampling layer # ------------------------------- generator_args = make_train_generator_args(wav_list=wav_list, feat_list=feat_list, feature_type=ft, use_upsampling_layer=True, batch_length=10000, batch_size=5) generator = train_generator(**vars(generator_args)) (x, h), t = next(generator) assert x.size(0) == t.size(0) == h.size(0) assert x.size(1) == t.size( 1) == h.size(2) * generator_args.upsampling_factor # ------------------------------------- # utterance batch with upsampling layer # ------------------------------------- generator_args = make_train_generator_args(wav_list=wav_list, feat_list=feat_list, feature_type=ft, use_upsampling_layer=True, batch_length=None, batch_size=5) generator = train_generator(**vars(generator_args)) (x, h), t = next(generator) assert x.size(0) == t.size(0) == h.size(0) == 1 assert x.size(1) == t.size( 1) == h.size(2) * generator_args.upsampling_factor
def test_decode_generator(): # make dummy wavfiles wavdir = "data/wav" if not os.path.exists(wavdir): os.makedirs(wavdir) for i in range(5): make_dummy_wav(wavdir + "/%d.wav" % i) # make features feat_args = make_feature_args() wav_list = find_files(wavdir, "*.wav") if not os.path.exists(feat_args.wavdir): os.makedirs(feat_args.wavdir) feat_args.feature_type = "melspc" melspectrogram_extract(wav_list, feat_args) feat_args.feature_type = "world" world_feature_extract(wav_list, feat_args) feat_list = find_files(feat_args.hdf5dir, "*.h5") for ft in ["world", "melspc"]: # ---------------------------------- # non-batch without upsampling layer # ---------------------------------- generator_args = make_decode_generator_args(feat_list=feat_list, feature_type=ft, use_upsampling_layer=False, batch_size=1) generator = decode_generator(**vars(generator_args)) _, (x, h, n_samples) = next(generator) assert x.size(0) == h.size(0) == 1 assert h.size(2) == n_samples + 1 # ------------------------------- # non-batch with upsampling layer # ------------------------------- generator_args = make_decode_generator_args(feat_list=feat_list, feature_type=ft, use_upsampling_layer=True, batch_size=1) generator = decode_generator(**vars(generator_args)) _, (x, h, n_samples) = next(generator) assert x.size(0) == h.size(0) == 1 assert h.size(2) * generator_args.upsampling_factor == n_samples + 1 # ---------------------------------- # minibatch without upsampling layer # ---------------------------------- generator_args = make_decode_generator_args(feat_list=feat_list, feature_type=ft, use_upsampling_layer=False, batch_size=5) generator = decode_generator(**vars(generator_args)) _, (batch_x, batch_h, n_samples_list) = next(generator) assert batch_x.size(0) == batch_h.size(0) == len(n_samples_list) assert batch_h.size(2) == max(n_samples_list) + 1 # ------------------------------- # minibatch with upsampling layer # ------------------------------- generator_args = make_decode_generator_args(feat_list=feat_list, feature_type=ft, use_upsampling_layer=True, batch_size=5) generator = decode_generator(**vars(generator_args)) _, (batch_x, batch_h, n_samples_list) = next(generator) assert batch_x.size(0) == batch_h.size(0) == len(n_samples_list) assert batch_h.size(2) * generator_args.upsampling_factor == max( n_samples_list) + 1