Esempio n. 1
0
def recognize(args):
    model, LFR_m, LFR_n = Transformer.load_model(args.model_path)
    print(model)
    model.eval()
    model.cuda()
    char_list, sos_id, eos_id = process_dict(args.dict)
    assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id

    # read json data
    with open(args.recog_json, 'rb') as f:
        js = json.load(f)['utts']
    # import Language Model
    lm_model = kenlm.Model(args.lm_path)
    # decode each utterance
    new_js = {}
    with torch.no_grad():
        for idx, name in enumerate(js.keys(), 1):
            print('(%d/%d) decoding %s' % (idx, len(js.keys()), name),
                  flush=True)
            input = kaldi_io.read_mat(js[name]['input'][0]['feat'])  # TxD
            input = build_LFR_features(input, LFR_m, LFR_n)
            input = torch.from_numpy(input).float()
            input_length = torch.tensor([input.size(0)], dtype=torch.int)
            input = input.cuda()
            input_length = input_length.cuda()
            nbest_hyps = model.recognize(input, input_length, char_list,
                                         lm_model, args)
            new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list)

    with open(args.result_label, 'wb') as f:
        f.write(
            json.dumps({
                'utts': new_js
            }, indent=4, sort_keys=True).encode('utf_8'))
Esempio n. 2
0
def recognize(args):
    # model
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    encoder = Encoder(
        args.d_input * args.LFR_m,
        args.n_layers_enc,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        pe_maxlen=args.pe_maxlen,
    )
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen,
    )
    model = Transformer(encoder, decoder)
    model.load_state_dict(flow.load(args.model_path))
    device = flow.device("cuda")
    model.eval()
    model.to(device)
    LFR_m = args.LFR_m
    LFR_n = args.LFR_n
    char_list, sos_id, eos_id = process_dict(args.dict)
    assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id

    # read json data
    with open(args.recog_json, "rb") as f:
        js = json.load(f)["utts"]

    # decode each utterance
    new_js = {}
    with flow.no_grad():
        for idx, name in enumerate(js.keys(), 1):
            print("(%d/%d) decoding %s" % (idx, len(js.keys()), name), flush=True)
            input = kaldi_io.read_mat(js[name]["input"][0]["feat"])
            input = build_LFR_features(input, LFR_m, LFR_n)
            input = flow.tensor(input).to(dtype=flow.float32)
            input_length = flow.tensor([input.size(0)], dtype=flow.int64)
            input = input.to(device)
            input_length = input_length.to(device)
            nbest_hyps = model.recognize(input, input_length, char_list, args)
            new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list)

    with open(args.result_label, "wb") as f:
        f.write(json.dumps({"utts": new_js}, indent=4, sort_keys=True).encode("utf_8"))
Esempio n. 3
0
def recognize(args):
    model, LFR_m, LFR_n = Transformer.load_model(args.model_path, args)
    print(model)
    # device = torch.device("cuda:0")
    # model = model.to(device)
    # model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
    model.eval()
    model.cuda()
    char_list, sos_id, eos_id = process_dict(args.dict)
    assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id

    # read json data
    with open(args.recog_json, 'r', encoding='utf-8') as f:
        js = json.load(f)['utts']

    # decode each utterance
    new_js = {}
    with torch.no_grad():
        for idx, name in enumerate(js.keys(), 1):
            print('(%d/%d) decoding %s' % (idx, len(js.keys()), name),
                  flush=True)
            input = kaldi_io.read_mat(js[name]['input'][0]['feat'])  # TxD
            input = build_LFR_features(input, LFR_m, LFR_n)
            input = torch.from_numpy(input).float()
            input_length = torch.tensor([input.size(0)], dtype=torch.int)
            input = input.cuda()
            input_length = input_length.cuda()
            nbest_hyps = model.recognize(input, input_length, char_list, args)
            new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list)
            # new_js[name]['output'][0]['rec_text'] = new_js[name]['output'][0]['rec_text'].encode('utf_8')
            # new_js[name]['output'][0]['rec_token'] = new_js[name]['output'][0]['rec_token'].encode('utf_8')
            # new_js[name]['output'][0]['text'] = new_js[name]['output'][0]['text'].encode('utf_8')
            # new_js[name]['output'][0]['token'] = new_js[name]['output'][0]['token'].encode('utf_8')
            # print(new_js[name])

    with open(args.result_label, 'wb') as f:
        f.write(
            json.dumps({
                'utts': new_js
            },
                       indent=4,
                       sort_keys=True,
                       ensure_ascii=False).encode('utf_8'))
Esempio n. 4
0
def recognize(args):
    #import pdb
    #pdb.set_trace()

    char_list, sos_id, eos_id = process_dict(args.dict)
    args.char_list = char_list
    model, LFR_m, LFR_n = Seq2Seq.load_model(args.model_path, args)
    print(model)
    model.eval()
    model.cuda()
    char_list, sos_id, eos_id = process_dict(args.dict)
    assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id

    # read json data
    with open(args.recog_json, 'rb') as f:
        js = json.load(f)['utts']

    # decode each utterance
    new_js = {}
    with torch.no_grad():
        for idx, name in enumerate(js.keys(), 1):
            print('(%d/%d) decoding %s' % (idx, len(js.keys()), name),
                  flush=True)
            input = kaldi_io.read_mat(js[name]['input'][0]['feat'])  # TxD
            input = build_LFR_features(input, LFR_m, LFR_n)
            input = torch.from_numpy(input).float()
            input_length = torch.tensor([input.size(0)], dtype=torch.int)
            input = input.cuda()
            input_length = input_length.cuda()
            if args.align_trun:
                align = (js[name]['output'][0]['ctcid'].split())
                nbest_hyps = model.recognize_align(input, input_length,
                                                   char_list, align, args)
            else:
                nbest_hyps = model.recognize(input, input_length, char_list,
                                             args)
            new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list)

    with open(args.result_label, 'wb') as f:
        f.write(
            json.dumps({
                'utts': new_js
            }, indent=4, sort_keys=True).encode('utf_8'))