Exemple #1
0
def _get_kaldi_fbank(waveform, sample_rate, n_bins=80) -> Optional[np.ndarray]:
    """Get mel-filter bank features via PyKaldi."""
    try:
        from kaldi.feat.mel import MelBanksOptions
        from kaldi.feat.fbank import FbankOptions, Fbank
        from kaldi.feat.window import FrameExtractionOptions
        from kaldi.matrix import Vector

        mel_opts = MelBanksOptions()
        mel_opts.num_bins = n_bins
        frame_opts = FrameExtractionOptions()
        frame_opts.samp_freq = sample_rate
        opts = FbankOptions()
        opts.mel_opts = mel_opts
        opts.frame_opts = frame_opts
        fbank = Fbank(opts=opts)
        features = fbank.compute(Vector(waveform), 1.0).numpy()
        return features
    except ImportError:
        return None
Exemple #2
0
    stride_steps=config_args['model_arch']['stride_steps'],
    pooling_type=config_args['model_arch']['pooling_type'])

nn_LID_model_DA.load_state_dict(torch.load(config_args['best_model'], map_location=torch.device('cpu')))

print(nn_LID_model_DA)
############## cmn #################

cmvn_stats = kaldiio.load_mat(config_args['source_cmvn'])
mean_stats = cmvn_stats[0,:-1]
count = cmvn_stats[0,-1]
offset = np.expand_dims(mean_stats,0)/count
CMVN = offset

############## Mfcc opts #################
fopts = FrameExtractionOptions()
fopts.samp_freq = 16000
fopts.snip_edges = True

hires_mb_opts = MelBanksOptions()
hires_mb_opts.low_freq = 40
hires_mb_opts.high_freq = -200
hires_mb_opts.num_bins = 40
hires_mfcc_opts = MfccOptions()
hires_mfcc_opts.frame_opts = fopts
hires_mfcc_opts.num_ceps = 40
hires_mfcc_opts.mel_opts = hires_mb_opts
hires_mfcc_opts.use_energy = False


def lid_module(key, audio_file, start, end):
Exemple #3
0
    print(key, out["text"], flush=True)

print("-" * 80, flush=True)


# Define feature pipeline in code
def make_feat_pipeline(base, opts=DeltaFeaturesOptions()):
    def feat_pipeline(wav):
        feats = base.compute_features(wav.data()[0], wav.samp_freq, 1.0)
        cmvn = Cmvn(base.dim())
        cmvn.accumulate(feats)
        cmvn.apply(feats)
        return compute_deltas(opts, feats)

    return feat_pipeline


frame_opts = FrameExtractionOptions()
frame_opts.samp_freq = 16000
frame_opts.allow_downsample = True
mfcc_opts = MfccOptions()
mfcc_opts.use_energy = False
mfcc_opts.frame_opts = frame_opts
feat_pipeline = make_feat_pipeline(Mfcc(mfcc_opts))

# Decode
for key, wav in SequentialWaveReader("scp:wav.scp"):
    feats = feat_pipeline(wav)
    out = asr.decode(feats)
    print(key, out["text"], flush=True)
Exemple #4
0
    raise IOError('==> please type in the model to load')

print('==> start testing.')

############### get labels file #################
with open(args.labels) as f:
    lines = f.read().splitlines()
label2idx = {}
for l in lines:
    label2idx[l.split()[0]] = int(l.split()[1])
i2l = {}
for key, val in label2idx.items():
    i2l[val] = key

############## Mfcc opts #################
fopts = FrameExtractionOptions()
fopts.samp_freq = 8000
fopts.snip_edges = False

hires_mb_opts = MelBanksOptions()
hires_mb_opts.low_freq = 40
hires_mb_opts.high_freq = -200
hires_mb_opts.num_bins = 40
hires_mfcc_opts = MfccOptions()
hires_mfcc_opts.frame_opts = fopts
hires_mfcc_opts.num_ceps = 40
hires_mfcc_opts.mel_opts = hires_mb_opts
hires_mfcc_opts.use_energy = False

############## Sliding Window opts #################
sliding_windows_opts = SlidingWindowCmnOptions()