def __init__(self, path = None,serviceAddress='http://172.16.100.29:5000/recognize/',fun_use=False): self.httpService = serviceAddress #音频波形动态显示,实时显示波形,实时进行离散傅里叶变换分析频域 if path is not None and os.path.isfile(path): self.stream = wave.open(path) self.rate = self.stream.getparams()[2] self.chunk = int(self.rate/1000*125) self.read = self.stream.readframes else: self.rate = 16000 self.chunk = 2000 p = pyaudio.PyAudio() self.stream = p.open(format=pyaudio.paInt16, channels=1, rate=self.rate, input=True, frames_per_buffer=self.chunk) self.read = self.stream.read self.yysb = GatedConv.load("语音识别MASR/pretrained/gated-conv.pth") self.data = [] fig = plt.figure(num='Real-time wave') ax1 = fig.add_subplot(2, 1, 1)#两行一列,第一子图 ax2 = fig.add_subplot(2, 1, 2)#两行一列,第二子图 self.t = np.linspace(0, self.chunk - 1, self.chunk) self.line1, = ax1.plot([], [], lw=2) ax1.set_xlim(0, self.chunk) ax1.set_ylim(-6000, 6000) self.line2, = ax2.plot([], [], lw=2) ax2.set_xlim(0, self.chunk) ax2.set_ylim(-5000, 5000) interval = int(1000*self.chunk/self.rate)#更新间隔/ms if not fun_use: animation.TimedAnimation.__init__(self, fig, interval=interval, blit=True)
def model_setup(args=None): test_dataset = data.MASRDataset(args.test_index_path, args.labels_path, args.mode, config=args) dataloader = data.MASRDataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) model = GatedConv.load(args.pretrained_path) global decoder decoder = BeamCTCDecoder( dataloader.dataset.labels_str, alpha=0.8, beta=0.3, lm_path="/root/lm/zh_giga.no_cna_cmn.prune01244.klm", cutoff_top_n=40, cutoff_prob=1.0, beam_width=100, num_processes=args.num_workers, blank_index=0, ) return model, dataloader
def model_setup(pretrained_path="pretrained/gated-conv.pth", alpha=0.8, beta=0.3, lm_path="/kaggle/working/lm/zh_giga.no_cna_cmn.prune01244.klm", cutoff_top_n=40, cutoff_prob=1.0, beam_width=32, num_processes=4, blank_index=0, vocabulary=None): num_processes = cpu_num model = GatedConv.load(pretrained_path) model.eval() if vocabulary is not None: model.vocabulary = vocabulary decoder = CTCBeamDecoder( model.vocabulary, lm_path, alpha, beta, cutoff_top_n, cutoff_prob, beam_width, num_processes, blank_index, ) return model, decoder
def recognize(): datas = request.json token = datas['token'] receipt_data = list(datas['data']) if token == 'SR': model = GatedConv.load("语音识别MASR/pretrained/gated-conv.pth") text = model.predict(receipt_data) return text elif token == 'FN': nums = np.array(receipt_data) mean = np.mean(nums) median = np.median(nums) return '平均数:{} 中位数:{}'.format(mean, median)
def recognize(): datas = request.json #datas = json.loads(request.get_data().decode("utf-8")) token = datas['token'] receipt_data = list(datas['data']) if token == 'SR': model = GatedConv.load("AboutDL/语音识别MASR/pretrained/gated-conv.pth") text = model.predict(receipt_data) print(text) return text elif token == 'FN': nums = np.array(receipt_data) mean = np.mean(nums) median = np.median(nums) return '平均数:{} 中位数:{}'.format(mean,median)
import torch import feature from models.conv import GatedConv import torch.nn.functional as F from ctcdecode import CTCBeamDecoder alpha = 0.8 beta = 0.3 lm_path = "lm/zh_giga.no_cna_cmn.prune01244.klm" cutoff_top_n = 40 cutoff_prob = 1.0 beam_width = 32 num_processes = 4 blank_index = 0 model = GatedConv.load("pretrained/gated-conv.pth") model.eval() decoder = CTCBeamDecoder( model.vocabulary, lm_path, alpha, beta, cutoff_top_n, cutoff_prob, beam_width, num_processes, blank_index, )
import torch import feature from models.conv import GatedConv import torch.nn.functional as F from ctcdecode import CTCBeamDecoder from config import lm_path, pretrained_model_path alpha = 0.8 beta = 0.3 cutoff_top_n = 40 cutoff_prob = 1.0 beam_width = 32 num_processes = 4 blank_index = 0 model = GatedConv.load(pretrained_model_path) model.eval() decoder = CTCBeamDecoder( model.vocabulary, lm_path, alpha, beta, cutoff_top_n, cutoff_prob, beam_width, num_processes, blank_index, )
import _init_path import platform from models.conv import GatedConv use_lm = True if use_lm: import beamdecode system_type = platform.system() if (system_type == 'Windows'): model = GatedConv.load("AboutDL\\语音识别MASR\\pretrained\\gated-conv.pth") #import scipy #_,receipt_data = scipy.io.wavfile.read("E:\\打开欢呼比.wav") #text = model.predict(receipt_data)事实证明效果相同 text = model.predict("E:\\打开欢呼比.wav") elif (system_type == 'Linux'): model = GatedConv.load('AboutDL/语音识别MASR/pretrained/gated-conv.pth') text = model.predict( "/media/yangjinming/DATA/Dataset/PrimeWords/d/d2/d25104a2-6be0-4950-9ec0-42e8e1303492.wav" ) print("识别结果:", text)
import _init_path from models.conv import GatedConv model = GatedConv.load("语音识别MASR/pretrained/gated-conv.pth") model.to_train() model.fit("data/train.index", "data/dev.index", train_batch_size=2)
# import _init_path from models.conv import GatedConv # model = GatedConv.load("pretrained/gated-conv.pth") # model = GatedConv.load("pretrained/model_62.pth") model = GatedConv.load("pretrained2/model_81.pth") text = model.predict("./sample_audio/8_16.wav") print("") print("识别结果:") print(text)
__mtime__ = '20210318' import os from models.conv import GatedConv from config import pretrained_model_path model = GatedConv.load(os.path.join('..', pretrained_model_path)) text = model.predict("../data_aishell/BAC009S0765W0130.wav") print("") print("识别结果:") print(text)
import torch import feature from models.conv import GatedConv import torch.nn.functional as F from ctcdecode import CTCBeamDecoder alpha = 0.8 beta = 0.3 lm_path = "/home/db/bing/yuyingshibie/masr/lm/zh_giga.no_cna_cmn.prune01244.klm" cutoff_top_n = 40 cutoff_prob = 1.0 beam_width = 32 num_processes = 4 blank_index = 0 model = GatedConv.load( "/home/db/bing/yuyingshibie/masr/pretrained/gated-conv.pth") model.eval() decoder = CTCBeamDecoder( model.vocabulary, lm_path, alpha, beta, cutoff_top_n, cutoff_prob, beam_width, num_processes, blank_index, )
import _init_path from models.conv import GatedConv # model = GatedConv.load("pretrained/gated-conv.pth") model = GatedConv.load("pretrained/model_3.pth") text = model.predict("./sample_audio/test.wav") print("") print("识别结果:") print(text)
parser.add_argument('--lm-alpha-from', default=0.0, type=float, help='Language model weight start tuning') parser.add_argument('--lm-alpha-to', default=3.0, type=float, help='Language model weight end tuning') parser.add_argument('--lm-beta-from', default=0.0, type=float, help='Language model word bonus (all words) start tuning') parser.add_argument('--lm-beta-to', default=0.5, type=float, help='Language model word bonus (all words) end tuning') parser.add_argument('--lm-num-alphas', default=45, type=float, help='Number of alpha candidates for tuning') parser.add_argument('--lm-num-betas', default=5, type=float, help='Number of beta candidates for tuning') parser = add_decoder_args(parser) args = parser.parse_args() if args.lm_path is None: print("error: LM must be provided for tuning") sys.exit(1) model = GatedConv.load(args.model_path) saved_output = np.load(args.saved_output, allow_pickle=True) def init(beam_width, blank_index, lm_path): global decoder, ae_decoder decoder = BeamCTCDecoder(model.vocabulary, lm_path=lm_path, beam_width=beam_width, num_processes=args.lm_workers, blank_index=blank_index) ae_decoder = GreedyDecoder(model.vocabulary) def decode_dataset(params): lm_alpha, lm_beta = params global decoder decoder._decoder.reset_params(lm_alpha, lm_beta)