class FeatureExtractorWorker: ''' 音声→特徴量 ''' def __init__(self, recorded_queue: Queue, feature_queue: Queue, feature_extractor_config: configs.FeatureExtractorConfig): self._recorded_queue = recorded_queue self._feature_queue = feature_queue self._feat = FeatureExtractor(analyzer='world', fs=feature_extractor_config.fs, fftl=feature_extractor_config.fftl, shiftms=feature_extractor_config.shiftms, minf0=feature_extractor_config.minf0, maxf0=feature_extractor_config.maxf0) self._feature_extractor_config = feature_extractor_config def start(self): while True: recorded_wav = self._recorded_queue.get() # 同期処理 recorded_wav = recorded_wav.astype(numpy.float) f0, spc, ap = self._feat.analyze(recorded_wav) mcep = self._feat.mcep( dim=self._feature_extractor_config.mcep_dim, alpha=self._feature_extractor_config.mcep_alpha) feature = create_feature_extractor_result(f0, spc, ap, mcep) self._feature_queue.put(feature)
def __init__(self, recorded_queue: Queue, feature_queue: Queue, feature_extractor_config: configs.FeatureExtractorConfig): self._recorded_queue = recorded_queue self._feature_queue = feature_queue self._feat = FeatureExtractor(analyzer='world', fs=feature_extractor_config.fs, fftl=feature_extractor_config.fftl, shiftms=feature_extractor_config.shiftms, minf0=feature_extractor_config.minf0, maxf0=feature_extractor_config.maxf0) self._feature_extractor_config = feature_extractor_config
def world_feature_extract(wav_list, args): """EXTRACT WORLD FEATURE VECTOR""" # define feature extractor feature_extractor = FeatureExtractor(analyzer="world", fs=args.fs, shiftms=args.shiftms, minf0=args.minf0, maxf0=args.maxf0, fftl=args.fftl) for i, wav_name in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) x = np.array(x, dtype=np.float32) if args.highpass_cutoff != 0: x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff) # check sampling frequency if not fs == args.fs: logging.error("sampling frequency is not matched.") sys.exit(1) # extract features f0, _, _ = feature_extractor.analyze(x) uv, cont_f0 = convert_continuos_f0(f0) cont_f0_lpf = low_pass_filter(cont_f0, int(1.0 / (args.shiftms * 0.001)), cutoff=20) codeap = feature_extractor.codeap() mcep = feature_extractor.mcep(dim=args.mcep_dim, alpha=args.mcep_alpha) # concatenate cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1) uv = np.expand_dims(uv, axis=-1) feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1) # save to hdf5 hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace( ".wav", ".h5") write_hdf5(hdf5name, "/feat_org", feats) if args.save_extended: # extend time resolution upsampling_factor = int(args.shiftms * fs * 0.001) feats_extended = extend_time(feats, upsampling_factor) feats_extended = feats_extended.astype(np.float32) write_hdf5(hdf5name, "/feat", feats_extended) # overwrite wav file if args.highpass_cutoff != 0: wavfile.write(args.wavdir + "/" + os.path.basename(wav_name), fs, np.int16(x))
def noise_shaping(wav_list, args): """APPLY NOISE SHAPING""" # define feature extractor feature_extractor = FeatureExtractor( analyzer="world", fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) # define synthesizer synthesizer = Synthesizer( fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) for i, feat_id in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (feat_id, i + 1, len(wav_list))) # load wavfile and apply low cut filter wav_filename = args.outdir.replace("feat_id", feat_id) fs, x = wavfile.read(wav_filename) wav_type = x.dtype x = np.array(x, dtype=np.float64) # check sampling frequency if not fs == args.fs: logging.error("sampling frequency is not matched.") sys.exit(1) ## extract features (only for get the number of frames) f0, _, _ = feature_extractor.analyze(x) num_frames = f0.shape[0] # load average mcep mlsa_coef = read_hdf5(args.stats, "/%s/mean" % args.feature_type) mlsa_coef = mlsa_coef[args.mcep_dim_start:args.mcep_dim_end] * args.mag mlsa_coef[0] = 0.0 if args.inv: mlsa_coef[1:] = -1.0 * mlsa_coef[1:] mlsa_coef = np.tile(mlsa_coef, [num_frames, 1]) # synthesis and write x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha) x_ns = low_cut_filter(x_ns, args.fs, cutoff=70) write_name = args.writedir.replace("feat_id", feat_id) # check directory existence wav = np.clip(x_ns, -32768, 32767) if wav_type == np.int16: wavfile.write(write_name, args.fs, np.int16(wav)) else: wavfile.write(write_name, args.fs, wav)
def world_feature_extract(wav_list, idx, f0_dict, npow_dict): f0s = [] npows = [] for f in wav_list: # open waveform wavf = f.rstrip() fs, x = wavfile.read(wavf) x = np.array(x, dtype=np.float) logging.info("Extract: " + wavf) # constract FeatureExtractor class feat = FeatureExtractor(analyzer='world', fs=fs) # f0 and npow extraction f0, _, _ = feat.analyze(x) npow = feat.npow() f0s.append(f0) npows.append(npow) f0_dict[idx] = f0s npow_dict[idx] = npows
def world_feature_extract(queue, wav_list, args): """EXTRACT WORLD FEATURE VECTOR Parameters ---------- queue : multiprocessing.Queue() the queue to store the file name of utterance wav_list : list list of the wav files args : feature extract arguments """ # define feature extractor feature_extractor = FeatureExtractor(analyzer="world", fs=args.fs, shiftms=args.shiftms, minf0=args.minf0, maxf0=args.maxf0, fftl=args.fftl) # extraction for i, wav_name in enumerate(wav_list): # check exists if args.feature_dir == None: feat_name = wav_name.replace("wav", args.feature_format) else: feat_name = rootdir_replace(wav_name, extname=args.feature_format, newdir=args.feature_dir) #if not os.path.exists(os.path.dirname(feat_name)): # os.makedirs(os.path.dirname(feat_name)) if check_hdf5(feat_name, "/world"): if args.overwrite: logging.info("overwrite %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) else: logging.info("skip %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) continue else: logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) x = np.array(x, dtype=np.float32) if args.highpass_cutoff != 0: x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff) # check sampling frequency if not fs == args.fs: logging.error("sampling frequency is not matched.") sys.exit(1) # extract features f0, spc, ap = feature_extractor.analyze(x) codeap = feature_extractor.codeap() mcep = feature_extractor.mcep(dim=args.mcep_dim, alpha=args.mcep_alpha) npow = feature_extractor.npow() uv, cont_f0 = convert_continuos_f0(f0) lpf_fs = int(1.0 / (args.shiftms * 0.001)) cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20) next_cutoff = 70 while not (cont_f0_lpf > [0]).all(): logging.info("%s low-pass-filtered [%dHz]" % (feat_name, next_cutoff)) cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff) next_cutoff *= 2 # concatenate cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1) uv = np.expand_dims(uv, axis=-1) feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1) # save feature write_hdf5(feat_name, "/world", feats) if args.save_f0: write_hdf5(feat_name, "/f0", f0) if args.save_ap: write_hdf5(feat_name, "/ap", ap) if args.save_spc: write_hdf5(feat_name, "/spc", spc) if args.save_npow: write_hdf5(feat_name, "/npow", npow) if args.save_extended: # extend time resolution upsampling_factor = int(args.shiftms * fs * 0.001) feats_extended = extend_time(feats, upsampling_factor) feats_extended = feats_extended.astype(np.float32) write_hdf5(feat_name, "/world_extend", feats_extended) if args.save_vad: _, vad_idx = extfrm(mcep, npow, power_threshold=args.pow_th) write_hdf5(feat_name, "/vad_idx", vad_idx) queue.put('Finish')
def main(): parser = argparse.ArgumentParser( description="making feature file argsurations.") parser.add_argument("--waveforms", default=None, help="directory or list of filename of input wavfile") parser.add_argument("--stats", default=None, help="filename of hdf5 format") parser.add_argument("--writedir", default=None, help="directory to save preprocessed wav file") parser.add_argument("--fs", default=FS, type=int, help="Sampling frequency") parser.add_argument("--shiftms", default=SHIFTMS, type=int, help="Frame shift in msec") parser.add_argument("--fftl", default=FFTL, type=int, help="FFT length") parser.add_argument("--mcep_dim_start", default=MCEP_DIM_START, type=int, help="Start index of mel cepstrum") parser.add_argument("--mcep_dim_end", default=MCEP_DIM_END, type=int, help="End index of mel cepstrum") parser.add_argument("--mcep_alpha", default=MCEP_ALPHA, type=float, help="Alpha of mel cepstrum") parser.add_argument("--mag", default=MAG, type=float, help="magnification of noise shaping") parser.add_argument("--verbose", default=1, type=int, help="log message level") parser.add_argument('--n_jobs', default=1, type=int, help="number of parallel jobs") parser.add_argument('--inv', default=False, type=strtobool, help="if True, inverse filtering will be performed") args = parser.parse_args() # read list if os.path.isdir(args.waveforms): file_list = sorted(find_files(args.waveforms, "*.wav")) else: file_list = read_txt(args.waveforms) # define feature extractor feature_extractor = FeatureExtractor(analyzer="world", fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) # define synthesizer synthesizer = Synthesizer(fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) # check directory existence if not os.path.exists(args.writedir): os.makedirs(args.writedir) def noise_shaping(wav_list): for wav_name in wav_list: # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) wav_type = x.dtype x = np.array(x, dtype=np.float64) # check sampling frequency if not fs == args.fs: print("ERROR: sampling frequency is not matched.") sys.exit(1) # extract features (only for get the number of frames) f0, _, _ = feature_extractor.analyze(x) num_frames = f0.shape[0] # load average mcep mlsa_coef = read_hdf5(args.stats, "/mean") mlsa_coef = mlsa_coef[args.mcep_dim_start:args. mcep_dim_end] * args.mag mlsa_coef[0] = 0.0 if args.inv: mlsa_coef[1:] = -1.0 * mlsa_coef[1:] mlsa_coef = np.tile(mlsa_coef, [num_frames, 1]) # synthesis and write x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha) x_ns = low_cut_filter(x_ns, args.fs, cutoff=70) if wav_type == np.int16: write_name = args.writedir + "/" + os.path.basename(wav_name) wavfile.write(write_name, args.fs, np.int16(x_ns)) else: wavfile.write(write_name, args.fs, x_ns) # divie list file_lists = np.array_split(file_list, args.n_jobs) file_lists = [f_list.tolist() for f_list in file_lists] # multi processing processes = [] for f in file_lists: p = mp.Process(target=noise_shaping, args=(f, )) p.start() processes.append(p) # wait for all process for p in processes: p.join()
def main(): parser = argparse.ArgumentParser( description="making feature file argsurations.") parser.add_argument("--waveforms", default=None, help="directory or list of filename of input wavfile") parser.add_argument("--hdf5dir", default=None, help="directory to save hdf5") parser.add_argument("--wavdir", default=None, help="directory to save of preprocessed wav file") parser.add_argument("--fs", default=FS, type=int, help="Sampling frequency") parser.add_argument("--shiftms", default=SHIFTMS, type=int, help="Frame shift in msec") parser.add_argument("--minf0", default=MINF0, type=int, help="minimum f0") parser.add_argument("--maxf0", default=MAXF0, type=int, help="maximum f0") parser.add_argument("--mcep_dim", default=MCEP_DIM, type=int, help="Dimension of mel cepstrum") parser.add_argument("--mcep_alpha", default=MCEP_ALPHA, type=float, help="Alpha of mel cepstrum") parser.add_argument("--fftl", default=FFTL, type=int, help="FFT length") parser.add_argument("--highpass_cutoff", default=HIGHPASS_CUTOFF, type=int, help="Cut off frequency in lowpass filter") parser.add_argument("--n_jobs", default=10, type=int, help="number of parallel jobs") parser.add_argument("--verbose", default=1, type=int, help="log message level") args = parser.parse_args() # read list if os.path.isdir(args.waveforms): file_list = sorted(find_files(args.waveforms, "*.wav")) else: file_list = read_txt(args.waveforms) # define feature extractor feature_extractor = FeatureExtractor(analyzer="world", fs=args.fs, shiftms=args.shiftms, minf0=args.minf0, maxf0=args.maxf0, fftl=args.fftl) # check directory existence if not os.path.exists(args.wavdir): os.makedirs(args.wavdir) if not os.path.exists(args.hdf5dir): os.makedirs(args.hdf5dir) def feature_extract(wav_list): for wav_name in wav_list: # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) x = np.array(x, dtype=np.float32) if args.highpass_cutoff != 0: x = low_cut_filter(x, fs, cutoff=args.highpass_cutoff) # check sampling frequency if not fs == args.fs: print("ERROR: sampling frequency is not matched.") sys.exit(1) # extract features f0, spc, ap = feature_extractor.analyze(x) uv, cont_f0 = convert_continuos_f0(f0) cont_f0_lpf = low_pass_filter(cont_f0, int(1.0 / (args.shiftms * 0.001)), cutoff=20) codeap = feature_extractor.codeap() mcep = feature_extractor.mcep(dim=args.mcep_dim, alpha=args.mcep_alpha) # concatenate cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1) uv = np.expand_dims(uv, axis=-1) feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1) # extend time resolution upsampling_factor = int(args.shiftms * fs * 0.001) feats_extended = extend_time(feats, upsampling_factor) # save to hdf5 feats_extended = feats_extended.astype(np.float32) hdf5name = args.hdf5dir + "/" + os.path.basename(wav_name).replace( ".wav", ".h5") write_hdf5(hdf5name, "/feat_org", feats) write_hdf5(hdf5name, "/feat", feats_extended) # overwrite wav file if args.highpass_cutoff != 0: wavfile.write(args.wavdir + "/" + os.path.basename(wav_name), fs, np.int16(x)) # divie list file_lists = np.array_split(file_list, args.n_jobs) file_lists = [f_list.tolist() for f_list in file_lists] # multi processing processes = [] for f in file_lists: p = mp.Process(target=feature_extract, args=(f, )) p.start() processes.append(p) # wait for all process for p in processes: p.join()