Example #1
0
 def register(self, option_parser: OptionsParser):
     self.samp_freq = option_parser.get("samp_freq",
                                        16000,
                                        type_function=np.int)
     self.frame_shift = option_parser.get("frame_shift",
                                          10.0,
                                          type_function=np.float)
     self.frame_length = option_parser.get("frame_length",
                                           25.0,
                                           type_function=np.float)
     self.dither = option_parser.get("dither", 1.0, type_function=np.float)
     self.preemph_coeff = option_parser.get("preemph_coeff",
                                            0.97,
                                            type_function=np.float)
     self.remove_dc_offset = option_parser.get("remove_dc_offset",
                                               "True",
                                               type_function=np.bool)
     self.window_type = option_parser.get("window_type",
                                          "povey",
                                          type_function=np.str)
     self.blackman_coeff = option_parser.get("blackman_coeff",
                                             0.42,
                                             type_function=np.float)
     self.snip_edges = option_parser.get("snip_edges",
                                         True,
                                         type_function=np.bool)
     self.allow_downsample = option_parser.get("allow_downsample",
                                               "False",
                                               type_function=np.bool)
Example #2
0
 def register(self, option_parser: OptionsParser):
     # self.order = option_parser.get("delta_order", 2, type_function=np.int)
     # self.window = option_parser.get("delta_window", 2, type_function=np.int)
     self.cmn_window = option_parser.get("cmn_window", 600, type_function=np.int)
     self.min_window = option_parser.get("min_window", 100, type_function=np.int)
     self.max_warnings = option_parser.get("max_warnings", 5, type_function=np.int)
     self.normalize_variance = option_parser.get("normalize_variance", "False", type_function=np.bool)
     self.center = option_parser.get("center", "False", type_function=np.bool)
Example #3
0
    def register(self, option_parser: OptionsParser):
        self.frame_opts.register(option_parser)
        self.mel_opts.register(option_parser)

        self.use_energy = option_parser.get("use_energy", False, type_function=np.bool)
        self.energy_floor = option_parser.get("energy_floor", 0.0, type_function=np.float)
        self.raw_energy = option_parser.get("raw_energy", True, type_function=np.bool)
        self.use_log_fbank = option_parser.get("use_log_fbank", True, type_function=np.bool)
        self.use_power = option_parser.get("use_power", True, type_function=np.bool)
Example #4
0
 def register(self, option_parser: OptionsParser):
     # self.use_energy = option_parser.get("use_energy", False, type_function=np.bool)
     self.vad_energy_threshold = option_parser.get("vad_energy_threshold",
                                                   5.0,
                                                   type_function=np.float)
     self.vad_energy_mean_scale = option_parser.get("vad_energy_mean_scale",
                                                    0.5,
                                                    type_function=np.float)
     self.vad_frames_context = option_parser.get("vad_frames_context",
                                                 0,
                                                 type_function=np.int)
     self.vad_proportion_threshold = option_parser.get(
         "vad_proportion_threshold", 0.6, type_function=np.float)
Example #5
0
    def register(self, option_parser: OptionsParser):
        self.frame_opts.register(option_parser)
        self.mel_opts.register(option_parser)

        self.num_ceps = option_parser.get("num_ceps", 13, type_function=np.int)
        self.use_energy = option_parser.get("use_energy",
                                            "True",
                                            type_function=np.bool)
        self.energy_floor = option_parser.get("energy_floor",
                                              0.0,
                                              type_function=np.float)  # 0.0
        self.raw_energy = option_parser.get("raw_energy",
                                            "True",
                                            type_function=np.bool)
        self.cepstral_lifter = option_parser.get("cepstral_lifter",
                                                 22.0,
                                                 type_function=np.float)
Example #6
0
def main(args):
    data_path = args.data_path
    save_path = args.save_path
    config_file = args.config_file
    config_section = args.config_section

    option_parser = OptionsParser(conf_file=config_file,
                                  conf_section=config_section)
    vad_opts = VadEnergyOptions()
    vad_opts.register(option_parser)

    feats_scp = os.path.join(data_path, "feats.scp")
    feature_reader = FeatureReader(feats_scp)
    vad_writer = FeatureWriter(save_path, split_num=1)

    num_done = 0
    num_err = 0
    num_unvoiced = 0
    tot_length = 0.0
    tot_decision = 0.0
    for (utt_id, utt_feat) in feature_reader:
        if utt_feat.shape[0] == 0:
            logging.warning(f"Empty feature matrix for utterance {utt_id}")
            num_err += 1
            continue

        vad_result = compute_vad_energy(vad_opts, utt_feat)
        vad_sum = np.sum(vad_result)
        if vad_sum == 0.0:
            logging.warning(
                f"No frames were judged voiced for utterance {utt_id}")
            num_unvoiced += 1
        else:
            num_done += 1

        tot_decision += np.sum(vad_result)
        tot_length += vad_result.shape[0]

        if vad_sum != 0.0:
            vad_writer.write(utt_id, vad_result)

    logging.info(f"Applied energy based voice activity detection "
                 f"{num_done} utterances successfully"
                 f"{num_err} had empty features, and "
                 f"{num_unvoiced} were completely unvoiced.")
    logging.info(f"Proportion of voiced frames was"
                 f"{float(tot_decision) / float(tot_length)} over"
                 f"{tot_length} frames")
Example #7
0
def main(args):
    data_path = args.data_path
    save_path = args.save_path
    config_file = args.config_file
    config_section = args.config_section

    wav_reader = WavReader(data_path)
    option_parser = OptionsParser(conf_file=config_file, conf_section=config_section)
    feature_extractor = FeatureExtractor(feature_type="fbank", option_parser=option_parser)
    feature_writer = FeatureWriter(save_path, split_num=1)

    for utt_id, (wav, sample_rate) in wav_reader:
        # print(f"waveform:\n{wav}\n")
        result = feature_extractor.compute_features(wav, sample_rate)
        print(f"utt_id = {utt_id}, result.shape = {result.shape}")
        feature_writer.write(utt_id, result)
    feature_writer.flush()
Example #8
0
 def register(self, option_parser: OptionsParser):
     self.frame_opts.register(option_parser)
     self.energy_floor = option_parser.get("energy_floor", 0.0, np.float)
     self.raw_energy = option_parser.get("raw_energy", "True", np.bool)
Example #9
0
 def register(self, option_parser: OptionsParser):
     self.num_bins = option_parser.get("num_mel_bins", 25, type_function=np.int)
     self.low_freq = option_parser.get("low_freq", 20.0, type_function=np.float)
     self.high_freq = option_parser.get("high_freq", 7600.0, type_function=np.float)
Example #10
0
 def register(self, option_parser: OptionsParser):
     self.order = option_parser.get("delta_order", 2, type_function=np.int)
     self.window = option_parser.get("delta_window", 2, type_function=np.int)