예제 #1
0
    def __init__(self, path = None,serviceAddress='http://172.16.100.29:5000/recognize/',fun_use=False):
        self.httpService = serviceAddress
        #音频波形动态显示,实时显示波形,实时进行离散傅里叶变换分析频域
        if path is not None and os.path.isfile(path):
            self.stream = wave.open(path)
            self.rate = self.stream.getparams()[2]
            self.chunk = int(self.rate/1000*125)
            self.read = self.stream.readframes
        else:
            self.rate = 16000
            self.chunk = 2000
            p = pyaudio.PyAudio()
            self.stream = p.open(format=pyaudio.paInt16, channels=1, rate=self.rate,
                            input=True, frames_per_buffer=self.chunk)
            self.read = self.stream.read
        self.yysb = GatedConv.load("语音识别MASR/pretrained/gated-conv.pth")

        self.data = []

        fig = plt.figure(num='Real-time wave')
        ax1 = fig.add_subplot(2, 1, 1)#两行一列,第一子图
        ax2 = fig.add_subplot(2, 1, 2)#两行一列,第二子图

        self.t = np.linspace(0, self.chunk - 1, self.chunk)
        self.line1, = ax1.plot([], [], lw=2)
        ax1.set_xlim(0, self.chunk)
        ax1.set_ylim(-6000, 6000)

        self.line2, = ax2.plot([], [], lw=2)
        ax2.set_xlim(0, self.chunk)
        ax2.set_ylim(-5000, 5000)

        interval = int(1000*self.chunk/self.rate)#更新间隔/ms
        if not fun_use:
            animation.TimedAnimation.__init__(self, fig, interval=interval, blit=True)
예제 #2
0
def model_setup(args=None):

    test_dataset = data.MASRDataset(args.test_index_path,
                                    args.labels_path,
                                    args.mode,
                                    config=args)
    dataloader = data.MASRDataLoader(test_dataset,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers)

    model = GatedConv.load(args.pretrained_path)

    global decoder
    decoder = BeamCTCDecoder(
        dataloader.dataset.labels_str,
        alpha=0.8,
        beta=0.3,
        lm_path="/root/lm/zh_giga.no_cna_cmn.prune01244.klm",
        cutoff_top_n=40,
        cutoff_prob=1.0,
        beam_width=100,
        num_processes=args.num_workers,
        blank_index=0,
    )

    return model, dataloader
예제 #3
0
def model_setup(pretrained_path="pretrained/gated-conv.pth",
                alpha=0.8,
                beta=0.3,
                lm_path="/kaggle/working/lm/zh_giga.no_cna_cmn.prune01244.klm",
                cutoff_top_n=40,
                cutoff_prob=1.0,
                beam_width=32,
                num_processes=4,
                blank_index=0,
                vocabulary=None):
    num_processes = cpu_num
    model = GatedConv.load(pretrained_path)
    model.eval()

    if vocabulary is not None:
        model.vocabulary = vocabulary

    decoder = CTCBeamDecoder(
        model.vocabulary,
        lm_path,
        alpha,
        beta,
        cutoff_top_n,
        cutoff_prob,
        beam_width,
        num_processes,
        blank_index,
    )
    return model, decoder
예제 #4
0
def recognize():
    datas = request.json
    token = datas['token']
    receipt_data = list(datas['data'])
    if token == 'SR':
        model = GatedConv.load("语音识别MASR/pretrained/gated-conv.pth")
        text = model.predict(receipt_data)
        return text
    elif token == 'FN':
        nums = np.array(receipt_data)
        mean = np.mean(nums)
        median = np.median(nums)
        return '平均数:{}   中位数:{}'.format(mean, median)
예제 #5
0
def recognize():
    datas = request.json
    #datas = json.loads(request.get_data().decode("utf-8"))
    token = datas['token']
    receipt_data = list(datas['data'])
    if token == 'SR':
        model = GatedConv.load("AboutDL/语音识别MASR/pretrained/gated-conv.pth")
        text = model.predict(receipt_data)
        print(text)
        return text
    elif token == 'FN':
        nums = np.array(receipt_data)
        mean = np.mean(nums)
        median = np.median(nums)
        return '平均数:{}   中位数:{}'.format(mean,median)
예제 #6
0
import torch
import feature
from models.conv import GatedConv
import torch.nn.functional as F
from ctcdecode import CTCBeamDecoder

alpha = 0.8
beta = 0.3
lm_path = "lm/zh_giga.no_cna_cmn.prune01244.klm"
cutoff_top_n = 40
cutoff_prob = 1.0
beam_width = 32
num_processes = 4
blank_index = 0

model = GatedConv.load("pretrained/gated-conv.pth")
model.eval()

decoder = CTCBeamDecoder(
    model.vocabulary,
    lm_path,
    alpha,
    beta,
    cutoff_top_n,
    cutoff_prob,
    beam_width,
    num_processes,
    blank_index,
)

예제 #7
0
import torch
import feature
from models.conv import GatedConv
import torch.nn.functional as F
from ctcdecode import CTCBeamDecoder
from config import lm_path, pretrained_model_path

alpha = 0.8
beta = 0.3
cutoff_top_n = 40
cutoff_prob = 1.0
beam_width = 32
num_processes = 4
blank_index = 0

model = GatedConv.load(pretrained_model_path)
model.eval()

decoder = CTCBeamDecoder(
    model.vocabulary,
    lm_path,
    alpha,
    beta,
    cutoff_top_n,
    cutoff_prob,
    beam_width,
    num_processes,
    blank_index,
)

예제 #8
0
import _init_path
import platform
from models.conv import GatedConv

use_lm = True
if use_lm:
    import beamdecode

system_type = platform.system()
if (system_type == 'Windows'):
    model = GatedConv.load("AboutDL\\语音识别MASR\\pretrained\\gated-conv.pth")
    #import scipy
    #_,receipt_data = scipy.io.wavfile.read("E:\\打开欢呼比.wav")
    #text = model.predict(receipt_data)事实证明效果相同
    text = model.predict("E:\\打开欢呼比.wav")
elif (system_type == 'Linux'):
    model = GatedConv.load('AboutDL/语音识别MASR/pretrained/gated-conv.pth')
    text = model.predict(
        "/media/yangjinming/DATA/Dataset/PrimeWords/d/d2/d25104a2-6be0-4950-9ec0-42e8e1303492.wav"
    )

print("识别结果:", text)
예제 #9
0
import _init_path
from models.conv import GatedConv

model = GatedConv.load("语音识别MASR/pretrained/gated-conv.pth")
model.to_train()
model.fit("data/train.index", "data/dev.index", train_batch_size=2)
예제 #10
0
# import _init_path
from models.conv import GatedConv

# model = GatedConv.load("pretrained/gated-conv.pth")
# model = GatedConv.load("pretrained/model_62.pth")
model = GatedConv.load("pretrained2/model_81.pth")

text = model.predict("./sample_audio/8_16.wav")

print("")
print("识别结果:")
print(text)
예제 #11
0
__mtime__ = '20210318'
import os
from models.conv import GatedConv
from config import pretrained_model_path

model = GatedConv.load(os.path.join('..', pretrained_model_path))

text = model.predict("../data_aishell/BAC009S0765W0130.wav")

print("")
print("识别结果:")
print(text)
예제 #12
0
import torch
import feature
from models.conv import GatedConv
import torch.nn.functional as F
from ctcdecode import CTCBeamDecoder

alpha = 0.8
beta = 0.3
lm_path = "/home/db/bing/yuyingshibie/masr/lm/zh_giga.no_cna_cmn.prune01244.klm"
cutoff_top_n = 40
cutoff_prob = 1.0
beam_width = 32
num_processes = 4
blank_index = 0

model = GatedConv.load(
    "/home/db/bing/yuyingshibie/masr/pretrained/gated-conv.pth")
model.eval()

decoder = CTCBeamDecoder(
    model.vocabulary,
    lm_path,
    alpha,
    beta,
    cutoff_top_n,
    cutoff_prob,
    beam_width,
    num_processes,
    blank_index,
)

예제 #13
0
import _init_path
from models.conv import GatedConv

# model = GatedConv.load("pretrained/gated-conv.pth")
model = GatedConv.load("pretrained/model_3.pth")

text = model.predict("./sample_audio/test.wav")

print("")
print("识别结果:")
print(text)
예제 #14
0
parser.add_argument('--lm-alpha-from', default=0.0, type=float, help='Language model weight start tuning')
parser.add_argument('--lm-alpha-to', default=3.0, type=float, help='Language model weight end tuning')
parser.add_argument('--lm-beta-from', default=0.0, type=float,
					help='Language model word bonus (all words) start tuning')
parser.add_argument('--lm-beta-to', default=0.5, type=float,
					help='Language model word bonus (all words) end tuning')
parser.add_argument('--lm-num-alphas', default=45, type=float, help='Number of alpha candidates for tuning')
parser.add_argument('--lm-num-betas', default=5, type=float, help='Number of beta candidates for tuning')
parser = add_decoder_args(parser)
args = parser.parse_args()

if args.lm_path is None:
	print("error: LM must be provided for tuning")
	sys.exit(1)

model = GatedConv.load(args.model_path)

saved_output = np.load(args.saved_output, allow_pickle=True)


def init(beam_width, blank_index, lm_path):
	global decoder, ae_decoder
	decoder = BeamCTCDecoder(model.vocabulary, lm_path=lm_path, beam_width=beam_width, num_processes=args.lm_workers,
							 blank_index=blank_index)
	ae_decoder = GreedyDecoder(model.vocabulary)


def decode_dataset(params):
	lm_alpha, lm_beta = params
	global decoder
	decoder._decoder.reset_params(lm_alpha, lm_beta)