예제 #1
0
def decode_results(model, decoded_output, decoded_offsets):
    results = {
        "output": [],
        "_meta": {
            "acoustic_model": {
                "name": os.path.basename(args.model_path)
            },
            "language_model": {
                "name": os.path.basename(args.lm_path) if args.lm_path else None,
            },
            "decoder": {
                "lm": args.lm_path is not None,
                "alpha": args.alpha if args.lm_path is not None else None,
                "beta": args.beta if args.lm_path is not None else None,
                "type": args.decoder,
            }
        }
    }
    results['_meta']['acoustic_model'].update(DeepSpeech.get_meta(model))

    for b in range(len(decoded_output)):
        for pi in range(min(args.top_paths, len(decoded_output[b]))):
            result = {'transcription': decoded_output[b][pi]}
            if args.offsets:
                result['offsets'] = decoded_offsets[b][pi].tolist()
            results['output'].append(result)
    return results
예제 #2
0
def decode_results(model, decoded_output, decoded_offsets):
    results = {
        "output": [],
        "_meta": {
            "acoustic_model": {
                "name": os.path.basename(args.model_path)
            },
            "language_model": {
                "name": os.path.basename(args.lm_path) if args.lm_path else None,
            },
            "decoder": {
                "lm": args.lm_path is not None,
                "alpha": args.alpha if args.lm_path is not None else None,
                "beta": args.beta if args.lm_path is not None else None,
                "type": args.decoder,
            }
        }
    }
    results['_meta']['acoustic_model'].update(DeepSpeech.get_meta(model))

    for b in range(len(decoded_output)):
        for pi in range(min(args.top_paths, len(decoded_output[b]))):
            result = {'transcription': decoded_output[b][pi]}
            if args.offsets:
                result['offsets'] = decoded_offsets[b][pi]
            results['output'].append(result)
    return results
예제 #3
0
def decode_results(model, decoded_output, decoded_offsets):
    results = {
        "output": [],
        "_meta": {
            "acoustic_model": {
                "name": os.path.basename(args.model_path)
            },
            "language_model": {
                "name":
                os.path.basename(args.lm_path) if args.lm_path else None,
            },
            "decoder": {
                "lm": args.lm_path is not None,
                "alpha": args.alpha if args.lm_path is not None else None,
                "beta": args.beta if args.lm_path is not None else None,
                "type": args.decoder,
            }
        }
    }
    results['_meta']['acoustic_model'].update(DeepSpeech.get_meta(model))
    str = ''
    print("len is : ", len(decoded_output))
    for b in range(len(decoded_output)):
        str2 = ''
        for pi in range(min(args.top_paths, len(decoded_output[b]))):
            result = {'transcription': decoded_output[b][pi]}
            #if(decoded_output[b][pi]!=" "):
            str2 += decoded_output[b][pi]
            if args.offsets:
                result['offsets'] = decoded_offsets[b][pi]
            results['output'].append(result)
        #str+=','
        #str+=removerepeat(str2)
        str += str2
    str = removerepeat(str)
    str = str.lower()
    print(str)
    #return results
    return str
예제 #4
0
def word_decode(decoder, data, time_div=50, window=5, model=None):
    strings, aligns, conf, char_probs = decoder.decode(data)

    results = {
        "one_best": "",
        "num_paths": decoder._top_n,
        "top_paths": [],
        "_meta": {
            "acoustic_model": {
                "name": os.path.basename(args.model_path),
                **DeepSpeech.get_meta(model)
            },
            "language_model": {
                "name":
                os.path.basename(args.lm_path) if args.lm_path else None,
                "dict":
                os.path.basename(args.dict_path) if args.dict_path else None
            },
            "decoder": {
                "lm": args.lm_path is not None,
                "dict": args.dict_path is not None,
                "alpha": args.lm_alpha if args.lm_path is not None else None,
                "beta": args.lm_beta if args.lm_path is not None else None,
                "type": args.decoder,
                "label_size": args.label_size,
                "label_margin": args.label_margin
            }
        }
    }

    for pi in range(len(strings)):
        for i in range(len(strings[pi])):
            path = {"rank": pi, "conf": float(conf[pi][i]), "tokens": []}
            word = ''
            word_prob = 0.0
            start_idx = -1
            for idx, c in enumerate(strings[pi][i]):
                if c == ' ' and word != '':
                    start_align = aligns[pi][i][start_idx]
                    end_align = aligns[pi][i][idx - 1] + window
                    path['tokens'].append({
                        "token": word,
                        "start": start_align / time_div,
                        "end": end_align / time_div,
                        "conf": word_prob
                    })
                    word = ''
                    start_idx = -1
                else:
                    if start_idx == -1:
                        start_idx = idx
                    word += c
                    word_prob += char_probs[pi][i][idx]
            if word != '':
                path['tokens'].append({
                    "token":
                    word,
                    "start": (aligns[pi][i][start_idx]) / time_div,
                    "end": (aligns[pi][i][len(strings[pi][i]) - 1] + window) /
                    time_div,
                    "conf":
                    word_prob
                })
        results['top_paths'].append(path)
    if len(results['top_paths']) > 0:
        results['one_best'] = " ".join(
            [x['token'] for x in results['top_paths'][0]['tokens']])
    return results