def _get_kaldi_fbank(waveform, sample_rate, n_bins=80) -> Optional[np.ndarray]: """Get mel-filter bank features via PyKaldi.""" try: from kaldi.feat.mel import MelBanksOptions from kaldi.feat.fbank import FbankOptions, Fbank from kaldi.feat.window import FrameExtractionOptions from kaldi.matrix import Vector mel_opts = MelBanksOptions() mel_opts.num_bins = n_bins frame_opts = FrameExtractionOptions() frame_opts.samp_freq = sample_rate opts = FbankOptions() opts.mel_opts = mel_opts opts.frame_opts = frame_opts fbank = Fbank(opts=opts) features = fbank.compute(Vector(waveform), 1.0).numpy() return features except ImportError: return None
stride_steps=config_args['model_arch']['stride_steps'], pooling_type=config_args['model_arch']['pooling_type']) nn_LID_model_DA.load_state_dict(torch.load(config_args['best_model'], map_location=torch.device('cpu'))) print(nn_LID_model_DA) ############## cmn ################# cmvn_stats = kaldiio.load_mat(config_args['source_cmvn']) mean_stats = cmvn_stats[0,:-1] count = cmvn_stats[0,-1] offset = np.expand_dims(mean_stats,0)/count CMVN = offset ############## Mfcc opts ################# fopts = FrameExtractionOptions() fopts.samp_freq = 16000 fopts.snip_edges = True hires_mb_opts = MelBanksOptions() hires_mb_opts.low_freq = 40 hires_mb_opts.high_freq = -200 hires_mb_opts.num_bins = 40 hires_mfcc_opts = MfccOptions() hires_mfcc_opts.frame_opts = fopts hires_mfcc_opts.num_ceps = 40 hires_mfcc_opts.mel_opts = hires_mb_opts hires_mfcc_opts.use_energy = False def lid_module(key, audio_file, start, end):
print(key, out["text"], flush=True) print("-" * 80, flush=True) # Define feature pipeline in code def make_feat_pipeline(base, opts=DeltaFeaturesOptions()): def feat_pipeline(wav): feats = base.compute_features(wav.data()[0], wav.samp_freq, 1.0) cmvn = Cmvn(base.dim()) cmvn.accumulate(feats) cmvn.apply(feats) return compute_deltas(opts, feats) return feat_pipeline frame_opts = FrameExtractionOptions() frame_opts.samp_freq = 16000 frame_opts.allow_downsample = True mfcc_opts = MfccOptions() mfcc_opts.use_energy = False mfcc_opts.frame_opts = frame_opts feat_pipeline = make_feat_pipeline(Mfcc(mfcc_opts)) # Decode for key, wav in SequentialWaveReader("scp:wav.scp"): feats = feat_pipeline(wav) out = asr.decode(feats) print(key, out["text"], flush=True)
raise IOError('==> please type in the model to load') print('==> start testing.') ############### get labels file ################# with open(args.labels) as f: lines = f.read().splitlines() label2idx = {} for l in lines: label2idx[l.split()[0]] = int(l.split()[1]) i2l = {} for key, val in label2idx.items(): i2l[val] = key ############## Mfcc opts ################# fopts = FrameExtractionOptions() fopts.samp_freq = 8000 fopts.snip_edges = False hires_mb_opts = MelBanksOptions() hires_mb_opts.low_freq = 40 hires_mb_opts.high_freq = -200 hires_mb_opts.num_bins = 40 hires_mfcc_opts = MfccOptions() hires_mfcc_opts.frame_opts = fopts hires_mfcc_opts.num_ceps = 40 hires_mfcc_opts.mel_opts = hires_mb_opts hires_mfcc_opts.use_energy = False ############## Sliding Window opts ################# sliding_windows_opts = SlidingWindowCmnOptions()