def test_get_count_of_word_instances(): from utilities.utilities import Utilities u = Utilities() word_list = ['hello', 'hi', 'hello', 'there', 'hello'] result = u.get_count_of_word_instances(word_list) expected = {'hello': 3, 'hi': 1, 'there': 1} assert result == expected
logger.debug(f'HYP FOR WER: {hyp}') logger.debug(f'REF FOR WER: {ref}') wer_obj.AddHypRef(hyp, ref) wer , ref_word_count, ref_error_count, ins, deletions, subs = wer_obj.GetWER() string = f'STATS: wer = {wer}%, ref words = {ref_word_count}, number of errors = {ref_error_count}' print(string) logger.debug(string) #Remove hyp/ref from WER wer_obj.AddHypRef('', '') # Get words producing errors inserted_words, deleted_words, substituted_words = wer_obj.GetMissedWords() delete_word_counts = utilities.get_count_of_word_instances(deleted_words) inserted_word_counts = utilities.get_count_of_word_instances(inserted_words) substituted_word_count = utilities.get_count_of_word_instances(substituted_words) logger.debug(f'INSERTED WORDS: {inserted_words}') logger.debug(f'DELETED WORDS {deleted_words}') logger.debug(f'SUBSTITUTED WORDS: {substituted_words}') word_count_list = (delete_word_counts, inserted_word_counts, substituted_word_count ) logger.debug(f'WORD COUNT LIST: {word_count_list}') io_handler.write_csv_header(configuration, nlp_model) io_handler.update_csv(wer, audio, configuration, nlp_model, word_count_list ) io_handler.write_html_diagnostic(wer_obj, unique_root, io_handler.get_result_path()) #NLP options if nlp_model.get_apply_stemming() or nlp_model.get_remove_stop_words() or nlp_model.get_n2w() or nlp_model.get_expand_contractions():