def extract_f0_and_npow(wavf): """ F0 and npow extraction Parameters ---------- wavf : str, File path of waveform file Returns ------- dict : Dictionary consisting of F0 and npow arrays """ # open waveform print("Extract: " + wavf) fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) # constract FeatureExtractor class feat = FeatureExtractor(analyzer="world", fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() return {"f0": f0, "npow": npow}
def _analyze_world_features(self, x, f0_only=False): feat = FeatureExtractor( analyzer="world", fs=self.conf["fs"], fftl=self.conf["fftl"], shiftms=self.conf["shiftms"], minf0=self.sconf["minf0"], maxf0=self.sconf["maxf0"], ) # analyze world based features self.feats["f0"], self.feats["spc"], self.feats["ap"] = feat.analyze(x) self.feats["uv"], self.feats["cf0"] = convert_continuos_f0( self.feats["f0"]) self.feats["lf0"] = np.log(self.feats["f0"] + EPS) self.feats["lcf0"] = np.log(self.feats["cf0"]) if f0_only: return if self.conf["fftl"] != 256 and self.conf["fs"] > 16000: # NOTE: 256 fft_size sometimes causes errors self.feats["mcep"] = feat.mcep(dim=self.conf["mcep_dim"], alpha=self.conf["mcep_alpha"]) self.feats["npow"] = feat.npow() self.feats["cap"] = feat.codeap() cap = self.feats["cap"] self.feats["ccap"] = np.zeros(cap.shape) self.feats["cap_uv"] = np.zeros(cap.shape) for d in range(self.feats["ccap"].shape[-1]): cap[np.where(cap[:, d] == max(cap[:, d])), d] = 0.0 ( self.feats["cap_uv"][:, d], self.feats["ccap"][:, d], ) = convert_continuos_f0(cap[:, d])
def _analyze_world_features(self, x): feat = FeatureExtractor( analyzer="world", fs=self.conf["fs"], fftl=self.conf["fftl"], shiftms=self.conf["shiftms"], minf0=self.sconf["minf0"], maxf0=self.sconf["maxf0"], ) # analyze world based features self.feats["f0"], self.feats["spc"], self.feats["ap"] = feat.analyze(x) self.feats["mcep"] = feat.mcep(dim=self.conf["mcep_dim"], alpha=self.conf["mcep_alpha"]) self.feats["npow"] = feat.npow() self.feats["cap"] = feat.codeap() cap = self.feats["cap"] self.feats["ccap"] = np.zeros(cap.shape) self.feats["cap_uv"] = np.zeros(cap.shape) for d in range(self.feats["ccap"].shape[-1]): cap[np.where(cap[:, d] == max(cap[:, d])), d] = 0.0 self.feats["cap_uv"][:, d], self.feats[ "ccap"][:, d] = convert_continuos_f0(cap[:, d]) self.feats["uv"], self.feats["cf0"] = convert_continuos_f0( self.feats["f0"]) self.feats["lf0"] = np.log(self.feats["f0"] + EPS) self.feats["lcf0"] = np.log(self.feats["cf0"])
def test_spc_and_npow(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5) _, spc, _ = af.analyze(x) npow = af.npow() assert spc.shape[0] == npow.shape[0]
def main(*argv): argv = argv if argv else sys.argv[1:] dcp = 'Create histogram for speaker-dependent configure' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Directory of wav file') parser.add_argument('figure_dir', type=str, help='Directory for figure output') args = parser.parse_args(argv) # open list file with open(args.list_file, 'r') as fp: files = fp.readlines() f0s = [] npows = [] for f in files: # open waveform f = f.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) print("Extract: " + wavf) # constract FeatureExtractor class feat = FeatureExtractor(analyzer='world', fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() f0s.append(f0) npows.append(npow) f0s = np.hstack(f0s).flatten() npows = np.hstack(npows).flatten() # create a histogram to visualize F0 range of the speaker f0histogrampath = os.path.join(args.figure_dir, args.speaker + '_f0histogram.png') create_histogram(f0s, f0histogrampath, range_min=40, range_max=700, step=50, xlabel='Fundamental frequency [Hz]') # create a histogram to visualize npow range of the speaker npowhistogrampath = os.path.join(args.figure_dir, args.speaker + '_npowhistogram.png') create_histogram(npows, npowhistogrampath, range_min=-70, range_max=20, step=10, xlabel="Frame power [dB]")
def main(*argv): argv = argv if argv else sys.argv[1:] dcp = 'Create histogram for speaker-dependent configure' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Directory of wav file') parser.add_argument('figure_dir', type=str, help='Directory for figure output') args = parser.parse_args(argv) # open list file with open(args.list_file, 'r') as fp: files = fp.readlines() f0s = [] npows = [] for f in files: # open waveform f = f.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) print("Extract: " + wavf) # constract FeatureExtractor class feat = FeatureExtractor(analyzer='world', fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() f0s.append(f0) npows.append(npow) f0s = np.hstack(f0s).flatten() npows = np.hstack(npows).flatten() # create a histogram to visualize F0 range of the speaker f0histogrampath = os.path.join( args.figure_dir, args.speaker + '_f0histogram.png') create_histogram(f0s, f0histogrampath, range_min=40, range_max=700, step=50, xlabel='Fundamental frequency [Hz]') # create a histogram to visualize npow range of the speaker npowhistogrampath = os.path.join( args.figure_dir, args.speaker + '_npowhistogram.png') create_histogram(npows, npowhistogrampath, range_min=-70, range_max=20, step=10, xlabel="Frame power [dB]")
def test_mod_power(self): path = os.path.join(dirpath, 'data', 'test16000.wav') fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5) f0, _, ap = af.analyze(x) mcep = af.mcep(dim=24, alpha=0.42) rmcep = mcep cvmcep = mcep * 1.50 modified_cvmcep = mod_power(cvmcep, rmcep, alpha=0.42) assert modified_cvmcep.shape == cvmcep.shape
def test_anasyn_44100(self): path = dirpath + '/data/test44100.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, minf0=100, fftl=2048) f0, spc, ap = af.analyze(x) mcep = af.mcep(dim=40, alpha=0.50) assert len(np.nonzero(f0)[0]) > 0 assert spc.shape == ap.shape # mcep synthesis synth = Synthesizer(fs=fs, fftl=2048, shiftms=5) wav = synth.synthesis(f0, mcep, ap, alpha=0.50) nun_check(wav)
def test_anasyn_16000(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, fftl=1024) f0, spc, ap = af.analyze(x) mcep = af.mcep(dim=24, alpha=0.42) assert len(np.nonzero(f0)[0]) > 0 assert spc.shape == ap.shape # synthesize F0, mcep, ap synth = Synthesizer(fs=fs, fftl=1024, shiftms=5) wav = synth.synthesis(f0, mcep, ap, alpha=0.42) nun_check(wav)
def test_synthesis_from_codeap(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5) f0, spc, ap = af.analyze(x) codeap = af.codeap() assert len(np.nonzero(f0)[0]) > 0 assert spc.shape == ap.shape assert pyworld.get_num_aperiodicities(fs) == codeap.shape[-1] ap = pyworld.decode_aperiodicity(codeap, fs, 1024) synth = Synthesizer(fs=fs, fftl=1024, shiftms=5) wav = synth.synthesis_spc(f0, spc, ap) nun_check(wav)
def get_world_features(wavpath, spk, conf, spkr_conf): x, fs = sf.read(str(wavpath)) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) fe = FeatureExtractor( analyzer="world", fs=conf["feature"]["fs"], fftl=conf["feature"]["fftl"], shiftms=conf["feature"]["shiftms"], minf0=spkr_conf[spk]["minf0"], maxf0=spkr_conf[spk]["maxf0"], ) cv_f0, _, _ = fe.analyze(x) cv_mcep = fe.mcep(dim=conf["feature"]["mcep_dim"], alpha=conf["feature"]["mcep_alpha"]) return cv_mcep, cv_f0
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def main(*argv): argv = argv if argv else sys.argv # Options for python dcp = 'Extract aoucstic features for the speaker' parser = argparse.ArgumentParser(description=dcp) parser.add_argument('--overwrite', default=False, action='store_true', help='Overwrite h5 file') parser.add_argument('speaker', type=str, help='Input speaker label') parser.add_argument('ymlf', type=str, help='Yml file of the input speaker') parser.add_argument('list_file', type=str, help='List file of the input speaker') parser.add_argument('wav_dir', type=str, help='Wav file directory of the speaker') parser.add_argument('pair_dir', type=str, help='Directory of the speaker pair') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.ymlf) h5_dir = os.path.join(args.pair_dir, 'h5') anasyn_dir = os.path.join(args.pair_dir, 'anasyn') if not os.path.exists(os.path.join(h5_dir, args.speaker)): os.makedirs(os.path.join(h5_dir, args.speaker)) if not os.path.exists(os.path.join(anasyn_dir, args.speaker)): os.makedirs(os.path.join(anasyn_dir, args.speaker)) # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # open list file with open(args.list_file, 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join(h5_dir, f + '.h5') if (not os.path.exists(h5f)) or args.overwrite: wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis(f0, mcep, ap, alpha=sconf.mcep_alpha, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join(anasyn_dir, f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)
def main(*argv): argv = argv if argv else sys.argv[1:] # Options for python description = 'estimate joint feature of source and target speakers' parser = argparse.ArgumentParser(description=description) parser.add_argument('-gmmmode', '--gmmmode', type=str, default=None, help='mode of the GMM [None, diff, or intra]') parser.add_argument('org', type=str, help='Original speaker') parser.add_argument('tar', type=str, help='Target speaker') parser.add_argument('org_yml', type=str, help='Yml file of the original speaker') parser.add_argument('pair_yml', type=str, help='Yml file of the speaker pair') parser.add_argument('eval_list_file', type=str, help='List file for evaluation') parser.add_argument('wav_dir', type=str, help='Directory path of source spekaer') parser.add_argument('pair_dir', type=str, help='Directory path of pair directory') args = parser.parse_args(argv) # read parameters from speaker yml sconf = SpeakerYML(args.org_yml) pconf = PairYML(args.pair_yml) # read GMM for mcep mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl') mcepgmm = GMMConvertor( n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=args.gmmmode, ) param = joblib.load(mcepgmmpath) mcepgmm.open_from_param(param) print("GMM for mcep conversion mode: {}".format(args.gmmmode)) # read F0 statistics stats_dir = os.path.join(args.pair_dir, 'stats') orgstatspath = os.path.join(stats_dir, args.org + '.h5') orgstats_h5 = HDF5(orgstatspath, mode='r') orgf0stats = orgstats_h5.read(ext='f0stats') orgstats_h5.close() # read F0 and GV statistics for target tarstatspath = os.path.join(stats_dir, args.tar + '.h5') tarstats_h5 = HDF5(tarstatspath, mode='r') tarf0stats = tarstats_h5.read(ext='f0stats') targvstats = tarstats_h5.read(ext='gv') tarstats_h5.close() # read GV statistics for converted mcep cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5') cvgvstats_h5 = HDF5(cvgvstatspath, mode='r') cvgvstats = cvgvstats_h5.read(ext='cvgv') diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv') cvgvstats_h5.close() mcepgv = GV() f0stats = F0statistics() # constract FeatureExtractor class feat = FeatureExtractor(analyzer=sconf.analyzer, fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms, minf0=sconf.f0_minf0, maxf0=sconf.f0_maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=sconf.wav_fs, fftl=sconf.wav_fftl, shiftms=sconf.wav_shiftms) # test directory test_dir = os.path.join(args.pair_dir, 'test') os.makedirs(os.path.join(test_dir, args.org), exist_ok=True) # conversion in each evaluation file with open(args.eval_list_file, 'r') as fp: for line in fp: # open wav file f = line.rstrip() wavf = os.path.join(args.wav_dir, f + '.wav') fs, x = wavfile.read(wavf) x = x.astype(np.float) x = low_cut_filter(x, fs, cutoff=70) assert fs == sconf.wav_fs # analyze F0, mcep, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha) mcep_0th = mcep[:, 0] # convert F0 cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats) # convert mcep cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]), cvtype=pconf.GMM_mcep_cvtype) cvmcep = np.c_[mcep_0th, cvmcep_wopow] # synthesis VC w/ GV if args.gmmmode is None: cvmcep_wGV = mcepgv.postfilter(cvmcep, targvstats, cvgvstats=cvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) wav = synthesizer.synthesis( cvf0, cvmcep_wGV, ap, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_VC.wav') # synthesis DIFFVC w/ GV if args.gmmmode == 'diff': cvmcep[:, 0] = 0.0 cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep, targvstats, cvgvstats=diffcvgvstats, alpha=pconf.GV_morph_coeff, startdim=1) - mcep wav = synthesizer.synthesis_diff( x, cvmcep_wGV, rmcep=mcep, alpha=sconf.mcep_alpha, ) wavpath = os.path.join(test_dir, f + '_DIFFVC.wav') # write waveform if not os.path.exists(os.path.join(test_dir, f)): os.makedirs(os.path.join(test_dir, f)) wav = np.clip(wav, -32768, 32767) wavfile.write(wavpath, fs, wav.astype(np.int16)) print(wavpath)
def melf0_feature_extract(queue, wav_list, config): """Mel-spc w/ F0 feature extraction Args: queue (multiprocessing.Queue): the queue to store the file name of utterance wav_list (list): list of the wav files config (dict): feature extraction config """ # define f0 feature extractor feature_extractor = FeatureExtractor( analyzer="world", fs=config['sampling_rate'], shiftms=config['shiftms'], minf0=config['minf0'], maxf0=config['maxf0'], fftl=config['fft_size']) # extraction for i, wav_name in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load wavfile (x, fs) = sf.read(wav_name) # check sampling frequency if not fs == config['sampling_rate']: logging.error("sampling frequency is not matched.") sys.exit(1) # extract f0 and uv features f0, _, _ = feature_extractor.analyze(x) uv, cont_f0 = convert_continuos_f0(f0) lpf_fs = int(1.0 / (config['shiftms'] * 0.001)) cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20) next_cutoff = 70 while not (cont_f0_lpf >= [0]).all(): cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff) next_cutoff *= 2 # extract mel-spc feature mel = logmelfilterbank(x, fs, fft_size=config["fft_size"], hop_size=config["hop_size"], win_length=config["win_length"], window=config["window"], num_mels=config["num_mels"], fmin=config["fmin"], fmax=config["fmax"]) # concatenate cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1) uv = np.expand_dims(uv, axis=-1) minlen = min(uv.shape[0], mel.shape[0]) feats = np.concatenate([uv[:minlen, :], cont_f0_lpf[:minlen, :], mel.astype(np.float32)[:minlen, :]], axis=1) if config['f0_cont']: feats = np.concatenate([uv[:minlen, :], cont_f0_lpf[:minlen, :], mel.astype(np.float32)[:minlen, :]], axis=1) else: feats = np.concatenate([uv[:minlen, :], f0[:minlen, np.newaxis], mel.astype(np.float32)[:minlen, :]], axis=1) # save feature feat_name = path_replace(wav_name, config['indir'], config['outdir'], extname=config['feature_format']) write_hdf5(feat_name, "/%s" % (config["feat_type"]), feats) if config['save_f0']: write_hdf5(feat_name, "/f0", f0) queue.put('Finish')
def world_feature_extract(queue, wav_list, config): """WORLD feature extraction Args: queue (multiprocessing.Queue): the queue to store the file name of utterance wav_list (list): list of the wav files config (dict): feature extraction config """ # define feature extractor feature_extractor = FeatureExtractor(analyzer="world", fs=config['sampling_rate'], shiftms=config['shiftms'], minf0=config['minf0'], maxf0=config['maxf0'], fftl=config['fft_size']) # extraction for i, wav_name in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) x = np.array(x, dtype=np.float32) if config['highpass_cutoff'] != 0: x = low_cut_filter(x, fs, cutoff=config['highpass_cutoff']) # check sampling frequency if not fs == config['sampling_rate']: logging.error("sampling frequency of %s is not matched." % wav_name) sys.exit(1) # extract features f0, spc, ap = feature_extractor.analyze(x) codeap = feature_extractor.codeap() mcep = feature_extractor.mcep(dim=config['mcep_dim'], alpha=config['mcep_alpha']) npow = feature_extractor.npow() uv, cont_f0 = convert_continuos_f0(f0) lpf_fs = int(1.0 / (config['shiftms'] * 0.001)) cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20) next_cutoff = 70 while not (cont_f0_lpf >= [0]).all(): cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff) next_cutoff *= 2 # concatenate cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1) uv = np.expand_dims(uv, axis=-1) feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1) # save feature feat_name = path_replace(wav_name, config['indir'], config['outdir'], extname=config['feature_format']) write_hdf5(feat_name, "/%s" % (config["feat_type"]), feats) if config['save_f0']: write_hdf5(feat_name, "/f0", f0) if config['save_ap']: write_hdf5(feat_name, "/ap", ap) if config['save_spc']: write_hdf5(feat_name, "/spc", spc) if config['save_npow']: write_hdf5(feat_name, "/npow", npow) queue.put('Finish')
def main(): if args.file == 'con': file = 'converted' elif args.file == 'tar': file = 'target' else: raise ValueError("The file is incorrect") feat = FeatureExtractor(analyzer='world', fs=22050, fftl=1024, shiftms=5, minf0=args.minf0, maxf0=args.maxf0) # constract Synthesizer class synthesizer = Synthesizer(fs=22050, fftl=1024, shiftms=5) # open list file with open('./list/' + file + '.list', 'r') as fp: for line in fp: f = line.rstrip() h5f = os.path.join('./' + file + '/h5f/', f + '.h5') if (not os.path.exists(h5f)): wavf = os.path.join('./' + file + '/wav/', f + '.wav') fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) x = low_cut_filter(x, fs, cutoff=70) print("Extract acoustic features: " + wavf) # analyze F0, spc, and ap f0, spc, ap = feat.analyze(x) mcep = feat.mcep(dim=34, alpha=0.544) npow = feat.npow() codeap = feat.codeap() # save features into a hdf5 file h5 = HDF5(h5f, mode='w') h5.save(f0, ext='f0') # h5.save(spc, ext='spc') # h5.save(ap, ext='ap') h5.save(mcep, ext='mcep') h5.save(npow, ext='npow') h5.save(codeap, ext='codeap') h5.close() # analysis/synthesis using F0, mcep, and ap wav = synthesizer.synthesis( f0, mcep, ap, alpha=0.544, ) wav = np.clip(wav, -32768, 32767) anasynf = os.path.join('./' + file + '/anasyn/', f + '.wav') wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16)) else: print("Acoustic features already exist: " + h5f)