Esempio n. 1
0
def test_get_count_of_word_instances():
    from utilities.utilities import Utilities
    u = Utilities()
    word_list = ['hello', 'hi', 'hello', 'there', 'hello']
    result = u.get_count_of_word_instances(word_list)
    expected = {'hello': 3, 'hi': 1, 'there': 1}
    assert result == expected
                                logger.debug(f'HYP FOR WER: {hyp}')
                                logger.debug(f'REF FOR WER: {ref}')
                                wer_obj.AddHypRef(hyp, ref)

                                wer , ref_word_count, ref_error_count, ins, deletions, subs = wer_obj.GetWER()
                                string = f'STATS: wer = {wer}%, ref words = {ref_word_count}, number of errors = {ref_error_count}'
                                print(string)
                                logger.debug(string)

                                #Remove hyp/ref from WER
                                wer_obj.AddHypRef('', '')

                                # Get words producing errors
                                inserted_words, deleted_words, substituted_words = wer_obj.GetMissedWords()
                                delete_word_counts = utilities.get_count_of_word_instances(deleted_words)
                                inserted_word_counts = utilities.get_count_of_word_instances(inserted_words)
                                substituted_word_count = utilities.get_count_of_word_instances(substituted_words)
                                logger.debug(f'INSERTED WORDS: {inserted_words}')
                                logger.debug(f'DELETED WORDS {deleted_words}')
                                logger.debug(f'SUBSTITUTED WORDS: {substituted_words}')
                                word_count_list = (delete_word_counts, inserted_word_counts,  substituted_word_count  )
                                logger.debug(f'WORD COUNT LIST: {word_count_list}')

                                io_handler.write_csv_header(configuration, nlp_model)

                                io_handler.update_csv(wer, audio, configuration, nlp_model, word_count_list )
                                io_handler.write_html_diagnostic(wer_obj, unique_root, io_handler.get_result_path())

                                #NLP options
                                if nlp_model.get_apply_stemming() or nlp_model.get_remove_stop_words() or nlp_model.get_n2w() or nlp_model.get_expand_contractions():