def register(self, option_parser: OptionsParser): self.samp_freq = option_parser.get("samp_freq", 16000, type_function=np.int) self.frame_shift = option_parser.get("frame_shift", 10.0, type_function=np.float) self.frame_length = option_parser.get("frame_length", 25.0, type_function=np.float) self.dither = option_parser.get("dither", 1.0, type_function=np.float) self.preemph_coeff = option_parser.get("preemph_coeff", 0.97, type_function=np.float) self.remove_dc_offset = option_parser.get("remove_dc_offset", "True", type_function=np.bool) self.window_type = option_parser.get("window_type", "povey", type_function=np.str) self.blackman_coeff = option_parser.get("blackman_coeff", 0.42, type_function=np.float) self.snip_edges = option_parser.get("snip_edges", True, type_function=np.bool) self.allow_downsample = option_parser.get("allow_downsample", "False", type_function=np.bool)
def register(self, option_parser: OptionsParser): # self.order = option_parser.get("delta_order", 2, type_function=np.int) # self.window = option_parser.get("delta_window", 2, type_function=np.int) self.cmn_window = option_parser.get("cmn_window", 600, type_function=np.int) self.min_window = option_parser.get("min_window", 100, type_function=np.int) self.max_warnings = option_parser.get("max_warnings", 5, type_function=np.int) self.normalize_variance = option_parser.get("normalize_variance", "False", type_function=np.bool) self.center = option_parser.get("center", "False", type_function=np.bool)
def register(self, option_parser: OptionsParser): self.frame_opts.register(option_parser) self.mel_opts.register(option_parser) self.use_energy = option_parser.get("use_energy", False, type_function=np.bool) self.energy_floor = option_parser.get("energy_floor", 0.0, type_function=np.float) self.raw_energy = option_parser.get("raw_energy", True, type_function=np.bool) self.use_log_fbank = option_parser.get("use_log_fbank", True, type_function=np.bool) self.use_power = option_parser.get("use_power", True, type_function=np.bool)
def register(self, option_parser: OptionsParser): # self.use_energy = option_parser.get("use_energy", False, type_function=np.bool) self.vad_energy_threshold = option_parser.get("vad_energy_threshold", 5.0, type_function=np.float) self.vad_energy_mean_scale = option_parser.get("vad_energy_mean_scale", 0.5, type_function=np.float) self.vad_frames_context = option_parser.get("vad_frames_context", 0, type_function=np.int) self.vad_proportion_threshold = option_parser.get( "vad_proportion_threshold", 0.6, type_function=np.float)
def register(self, option_parser: OptionsParser): self.frame_opts.register(option_parser) self.mel_opts.register(option_parser) self.num_ceps = option_parser.get("num_ceps", 13, type_function=np.int) self.use_energy = option_parser.get("use_energy", "True", type_function=np.bool) self.energy_floor = option_parser.get("energy_floor", 0.0, type_function=np.float) # 0.0 self.raw_energy = option_parser.get("raw_energy", "True", type_function=np.bool) self.cepstral_lifter = option_parser.get("cepstral_lifter", 22.0, type_function=np.float)
def main(args): data_path = args.data_path save_path = args.save_path config_file = args.config_file config_section = args.config_section option_parser = OptionsParser(conf_file=config_file, conf_section=config_section) vad_opts = VadEnergyOptions() vad_opts.register(option_parser) feats_scp = os.path.join(data_path, "feats.scp") feature_reader = FeatureReader(feats_scp) vad_writer = FeatureWriter(save_path, split_num=1) num_done = 0 num_err = 0 num_unvoiced = 0 tot_length = 0.0 tot_decision = 0.0 for (utt_id, utt_feat) in feature_reader: if utt_feat.shape[0] == 0: logging.warning(f"Empty feature matrix for utterance {utt_id}") num_err += 1 continue vad_result = compute_vad_energy(vad_opts, utt_feat) vad_sum = np.sum(vad_result) if vad_sum == 0.0: logging.warning( f"No frames were judged voiced for utterance {utt_id}") num_unvoiced += 1 else: num_done += 1 tot_decision += np.sum(vad_result) tot_length += vad_result.shape[0] if vad_sum != 0.0: vad_writer.write(utt_id, vad_result) logging.info(f"Applied energy based voice activity detection " f"{num_done} utterances successfully" f"{num_err} had empty features, and " f"{num_unvoiced} were completely unvoiced.") logging.info(f"Proportion of voiced frames was" f"{float(tot_decision) / float(tot_length)} over" f"{tot_length} frames")
def main(args): data_path = args.data_path save_path = args.save_path config_file = args.config_file config_section = args.config_section wav_reader = WavReader(data_path) option_parser = OptionsParser(conf_file=config_file, conf_section=config_section) feature_extractor = FeatureExtractor(feature_type="fbank", option_parser=option_parser) feature_writer = FeatureWriter(save_path, split_num=1) for utt_id, (wav, sample_rate) in wav_reader: # print(f"waveform:\n{wav}\n") result = feature_extractor.compute_features(wav, sample_rate) print(f"utt_id = {utt_id}, result.shape = {result.shape}") feature_writer.write(utt_id, result) feature_writer.flush()
def register(self, option_parser: OptionsParser): self.frame_opts.register(option_parser) self.energy_floor = option_parser.get("energy_floor", 0.0, np.float) self.raw_energy = option_parser.get("raw_energy", "True", np.bool)
def register(self, option_parser: OptionsParser): self.num_bins = option_parser.get("num_mel_bins", 25, type_function=np.int) self.low_freq = option_parser.get("low_freq", 20.0, type_function=np.float) self.high_freq = option_parser.get("high_freq", 7600.0, type_function=np.float)
def register(self, option_parser: OptionsParser): self.order = option_parser.get("delta_order", 2, type_function=np.int) self.window = option_parser.get("delta_window", 2, type_function=np.int)