def main_deepspeech(args): args = parse_args_deep() if args is None else args print('Loading model from file %s' % (args.model), file=sys.stderr) model_load_start = timer() ds = Model(args.model, N_FEATURES, N_CONTEXT, args.alphabet, BEAM_WIDTH) model_load_end = timer() - model_load_start print('Loaded model in %0.3fs.' % (model_load_end), file=sys.stderr) if args.lm and args.trie: print('Loading language model from files %s %s' % (args.lm, args.trie), file=sys.stderr) lm_load_start = timer() ds.enableDecoderWithLM(args.alphabet, args.lm, args.trie, LM_WEIGHT, WORD_COUNT_WEIGHT, VALID_WORD_COUNT_WEIGHT) lm_load_end = timer() - lm_load_start print('Loaded language model in %0.3fs.' % (lm_load_end), file=sys.stderr) fs, audio = wave.read(args.audio) # We can assume 16kHz audio_length = len(audio) * (1 / 16000) assert fs == 16000, "Only 16000Hz input WAV files are supported for now!" print('Running inference.', file=sys.stderr) inference_start = timer() print(ds.stt(audio, fs)) inference_end = timer() - inference_start print('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length), file=sys.stderr)
def predict(self): try: #filename如果未定义则会抛出异常 if (filename[-3:] == 'wav'): fs, wav_data = wave.read(filename) print(fs) wav_length = len(wav_data) print("length of wave:", wav_length) print("data of wav(int):", type(wav_data)) y = wav_data segment_sample_num = 4240 segment_move = 400 start = 0 end = start + segment_sample_num seg_num = 0 while end < wav_length: start = start + segment_move end = start + segment_sample_num seg_num += 1 print(seg_num) after_segment = [""] * seg_num start = 0 x_test = np.zeros((seg_num, 32, 129)) for i in range(seg_num): after_segment[i] = y[start:start + segment_sample_num + 1] start = start + segment_move f, t, spectro = sig.spectrogram(after_segment[i], fs, ('hamming'), 256, 128, 256) #print(f,t) #print(shape(spectro)) spectro = log(1 + abs(spectro)) for a in range(129): for b in range(32): x_test[i][b][a] = spectro[a][b] print(shape(x_test)) if K.image_data_format() == 'channels_first': x_test = x_test.reshape(x_test.shape[0], 1, 32, 129) else: x_test = x_test.reshape(x_test.shape[0], 32, 129, 1) x_test = x_test.astype('float32') print('x_train shape:', x_test.shape) print(x_test.shape[0], 'test samples') preds = self.model.predict(x_test, batch_size=32, verbose=1) print(type(preds)) print(preds) print(shape(preds)) preds = preds.sum(axis=0) print(preds) pred_sum = preds.tolist() condition = pred_sum.index(max(pred_sum)) if condition == 0: showinfo('预测', '') else: showwarning('warning', '请选择音频文件') except NameError: showwarning('warning', '请选择文件') else: #print(predictss) showinfo('The answer of prediction')
import wave, struct [Fs, x] = wave.read('test.wav', 'r') print Fs """ length = waveFile.getnframes() for i in range(0,20): waveData = waveFile.readframes(1) data = struct.unpack("<h", waveData) print(int(data[0])) """
import wave, struct w = wave.read('test.wav', 'r') length = waveFile.getnframes() for i in range(0, 20): waveData = waveFile.readframes(1) data = struct.unpack("<h", waveData) print(int(data[0]))
from __future__ import print_function import wave import struct from scipy import * import numpy as np import scipy.io as scio import scipy.io.wavfile as wave from scipy import signal as sig import os path = 'H:\RawData2' mylist = os.listdir(path) mark=0 for index,y_name in enumerate(mylist): fs,wav_data = wave.read(path+'\\' +y_name,mmap=False) print(fs) wav_length = len(wav_data) print("length of wave:",wav_length) print("data of wav(int):",type(wav_data)) y=wav_data segment_sample_num = 4240 segment_move = 400 start = 0 end = start+segment_sample_num seg_num = 0 while end < wav_length : start = start+segment_move end = start+segment_sample_num seg_num += 1
def playFile(waveFileName): if os.path.dirname(waveFileName) == '': waveFileName = os.path.join(os.path.abspath(os.path.dirname(__file__)), waveFileName) with open(waveFileName, 'rb') as wave: play(wave.read(500 * 1024))