コード例 #1
0
    def __init__(self, model_name, trans_df):

        from espnet2.bin.asr_inference import Speech2Text
        from espnet_model_zoo.downloader import ModelDownloader
        import jiwer

        self.model_name = model_name
        d = ModelDownloader()
        self.asr_model = Speech2Text(**d.download_and_unpack(model_name))
        self.input_txt_list = []
        self.clean_txt_list = []
        self.output_txt_list = []
        self.transcriptions = []
        self.true_txt_list = []
        self.sample_rate = int(
            d.data_frame[d.data_frame["name"] == model_name]["fs"])
        self.trans_df = trans_df
        self.trans_dic = self._df_to_dict(trans_df)
        self.mix_counter = Counter()
        self.clean_counter = Counter()
        self.est_counter = Counter()
        self.transformation = jiwer.Compose([
            jiwer.ToLowerCase(),
            jiwer.RemovePunctuation(),
            jiwer.RemoveMultipleSpaces(),
            jiwer.Strip(),
            jiwer.SentencesToListOfWords(),
            jiwer.RemoveEmptyStrings(),
        ])
コード例 #2
0
def rmPunctuation(values):
    """preprocess the list of words to RemovePunctuation"""
    newValues = []
    for v in values:
        newValue = jiwer.RemovePunctuation()(v)
        newValue = jiwer.Strip()(newValue)
        newValue = jiwer.RemoveMultipleSpaces()(newValue)
        newValues.append(newValue)
    return newValues
def calc_wer(ground_truth, hypothesis):
    transformation = jiwer.Compose([
        jiwer.ToLowerCase(),
        jiwer.RemoveMultipleSpaces(),
        jiwer.Strip(),
        jiwer.ExpandCommonEnglishContractions(),
        jiwer.RemovePunctuation()
    ])
    wer = jiwer.wer(ground_truth,
                    hypothesis,
                    truth_transform=transformation,
                    hypothesis_transform=transformation)
    return wer
コード例 #4
0
def sentence_wer(reference: str, prediction: str):
    transformation = jiwer.Compose([
        jiwer.RemoveMultipleSpaces(),
        jiwer.RemovePunctuation(),
        jiwer.Strip(),
        jiwer.ToLowerCase(),
        jiwer.ExpandCommonEnglishContractions(),
        jiwer.RemoveWhiteSpace(replace_by_space=True),
        jiwer.SentencesToListOfWords(),
        jiwer.RemoveEmptyStrings(),
    ])

    return jiwer.wer(reference.strip(),
                     prediction.strip(),
                     truth_transform=transformation,
                     hypothesis_transform=transformation)
コード例 #5
0
def normalize_sentence(sentence):
    """Normalize sentence"""
    # Convert all characters to upper.
    sentence = sentence.upper()
    # Delete punctuations.
    sentence = jiwer.RemovePunctuation()(sentence)
    # Remove \n, \t, \r, \x0c.
    sentence = jiwer.RemoveWhiteSpace(replace_by_space=True)(sentence)
    # Remove multiple spaces.
    sentence = jiwer.RemoveMultipleSpaces()(sentence)
    # Remove white space in two end of string.
    sentence = jiwer.Strip()(sentence)

    # Convert all characters to upper.
    sentence = sentence.upper()

    return sentence
コード例 #6
0
def analyze():
    try:
        req_data = request.get_json()

        compose_rule_set = []
        if req_data.get('to_lower_case', False) == True:
            compose_rule_set.append(jiwer.ToLowerCase())
        if req_data.get('strip_punctuation', False) == True:
            compose_rule_set.append(jiwer.RemovePunctuation())
        if req_data.get('strip_words', False) == True:
            compose_rule_set.append(jiwer.Strip())
        if req_data.get('strip_multi_space', False) == True:
            compose_rule_set.append(jiwer.RemoveMultipleSpaces())
        word_excepts = req_data.get('t_words', '')
        if word_excepts != '':
            words = [a.strip() for a in word_excepts.split(",")]
            compose_rule_set.append(jiwer.RemoveSpecificWords(words))

        compose_rule_set.append(
            jiwer.RemoveWhiteSpace(
                replace_by_space=req_data.get('replace_whitespace', False)))

        transformation = jiwer.Compose(compose_rule_set)

        measures = jiwer.compute_measures(req_data.get('s_truth', ""),
                                          req_data.get('s_hypo', ""),
                                          truth_transform=transformation,
                                          hypothesis_transform=transformation)

        return jsonify({
            "wer": measures['wer'],
            "mer": measures['mer'],
            "wil": measures['wil']
        })
    except:
        return jsonify("API endpoint Error")
コード例 #7
0
def metric(ref_trans, asr_trans, lang):
    if lang == "en":
        transformation = jiwer.Compose([
            jiwer.Strip(),
            jiwer.ToLowerCase(),
            jiwer.RemoveWhiteSpace(replace_by_space=True),
            jiwer.RemoveMultipleSpaces(),
            jiwer.SentencesToListOfWords(word_delimiter=" "),
            jiwer.RemoveEmptyStrings(),
            jiwer.RemovePunctuation(),
        ])
        wer = jiwer.wer(
            ref_trans,
            asr_trans,
            truth_transform=transformation,
            hypothesis_transform=transformation,
        )
    elif lang == "cn":
        del_symblos = re.compile(r"[^\u4e00-\u9fa5]+")
        for idx in range(len(asr_trans)):
            sentence = re.sub(del_symblos, "", asr_trans[idx])
            sentence = list(sentence)
            sentence = " ".join(sentence)
            asr_trans[idx] = sentence

            sentence = re.sub(del_symblos, "", ref_trans[idx])
            sentence = list(sentence)
            sentence = " ".join(sentence)
            ref_trans[idx] = sentence
        asr_valid = set(asr_trans)
        assert len(asr_valid) == len(asr_trans)
        wer = jiwer.wer(ref_trans, asr_trans)

    else:
        raise ("Args error!")
    return wer
コード例 #8
0
ファイル: calculate_wer.py プロジェクト: bethard/redshred
        return regex.sub(r"\p{P}", "", s)


# remove some differences that we don't care about for comparisons
transform = jiwer.Compose([
    jiwer.ToLowerCase(),
    RemovePunctuation(),
    jiwer.SubstituteRegexes(
        {r"\b(uh|um|ah|hi|alright|all right|well|kind of)\b": ""}),
    jiwer.SubstituteWords({
        "one": "1", "two": "2", "three": "3", "four": "4", "five": "5",
        "six": "6", "seven": "7", "eight": "8", "nine": "9", "ten": "10",
        "plus": "+", "minus": "-",
        "check out": "checkout", "hard point": "hardpoint"}),
    jiwer.RemoveMultipleSpaces(),
    jiwer.Strip(),
    jiwer.SentencesToListOfWords(),
    jiwer.RemoveEmptyStrings()
])

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("json_path")
    parser.add_argument("--verbose", action="store_true")
    parser.add_argument("--cleaned", action="store_true")
    args = parser.parse_args()


    all_expected = []
    all_actual = []
    all_cleaned_actual = []