def mixture_feat_extractor_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MixtureExtractor( frameDim=400, batchSize=100, mixType=["mfcc", "fbank"], useEnergyForFbank=False, useEnergyForMfcc=False, ) reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) extractor.wait() print(extractor.outPIPE.size()) pac = extractor.outPIPE.get() print(pac.data.shape)
def feat_extractor_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MfccExtractor( batchSize=100, useEnergy=False, ) reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) extractor.wait() print(extractor.outPIPE.size())
def feat_estimator_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MfccExtractor( batchSize=100, useEnergy=False, ) processor = feature.FeatureProcessor( featDim=13, delta=2, spliceLeft=10, spliceRight=10, cmvNormalizer=feature.FrameSlideCMVNormalizer(), ) left = 5 right = 5 estimator = decode.AcousticEstimator( featDim=819, batchSize=100, applySoftmax=False, applyLog=False, leftContext=left, rightContext=right, ) estimator.acoustic_function = lambda x: x[left:-right].copy() reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) processor.start(inPIPE=extractor.outPIPE) estimator.start(inPIPE=processor.outPIPE) estimator.wait() print(estimator.outPIPE.size())
def cutter_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) reader.start() cutter.start(inPIPE=reader.outPIPE) cutter.wait() print(cutter.outPIPE.size())
def feat_processor_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MfccExtractor( batchSize=100, useEnergy=False, ) processor = feature.FeatureProcessor( featDim=13, delta=2, spliceLeft=10, spliceRight=10, cmvNormalizer=feature.FrameSlideCMVNormalizer(), ) reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) processor.start(inPIPE=extractor.outPIPE) processor.wait() print(processor.outPIPE.size()) pac = processor.outPIPE.get() print(pac.data.shape)
words = f"{rootDir}/tri3b/graph_tgsmall/words.txt" hmm = f"{rootDir}/tri3b_ali_train_clean_5/final.mdl" HCLG = f"{rootDir}/tri3b/graph_tgsmall/HCLG.fst" pdfDim = decode.get_pdf_dim(hmm) kerasmodel = make_DNN_acoustic_model(featDim, pdfDim) kerasmodel.load_weights(kerasModel) ########################## # Define components ########################## # 1. Create a stream reader to read realtime stream from audio file reader = stream.StreamReader(waveFile, simulate=True) # 2. Cutter to cut frame cutter = stream.ElementFrameCutter(width=400, shift=160) # 3. MFCC feature extracting extractor = feature.MfccExtractor( frameDim=400, batchSize=100, useEnergy=False, ) # 4. processing feature processor = feature.FeatureProcessor( featDim=13, batchSize=100, delta=delta, spliceLeft=spliceLeft, spliceRight=spliceRight, cmvNormalizer=feature.FrameSlideCMVNormalizer(), )
########################## # Load DNN acoustic model ########################## pdfDim = decode.get_pdf_dim(hmm) kerasmodel = make_DNN_acoustic_model(featDim,pdfDim) kerasmodel.load_weights(kerasModel) ########################## # Define components ########################## # 1. Create a stream recorder to read realtime stream from microphone recorder = stream.StreamRecorder() # 2. Cutter to cut frame cutter = stream.ElementFrameCutter(batchSize=50,width=400,shift=160) # 3. MFCC feature extracting extractor = feature.MfccExtractor( useEnergy=False, ) # 4. processing feature processor = feature.MatrixFeatureProcessor( delta=delta, spliceLeft=spliceLeft, spliceRight=spliceRight, cmvNormalizer=feature.FrameSlideCMVNormalizer(), ) # 5. acoustic probability computer def keras_compute(feats): return kerasmodel(feats,training=False).numpy()