def test_create_unique_root_2(): from utilities.utilities import Utilities from model.configuration import Configuration from model.nlp import NLPModel u = Utilities() configuration = Configuration() nlp_model = NLPModel() root = '12345' configuration.set_model('video') configuration.set_use_enhanced(False) configuration.set_language_code('fr_FR') configuration.set_alternative_language_codes(['en-US', 'ru-RU']) configuration.set_speech_context('hi', 5) nlp_model.set_remove_stop_words(True) nlp_model.set_apply_stemming(False) nlp_model.set_expand_contractions(True) nlp_model.set_n2w(True) result = u.create_unique_root(root, configuration, nlp_model) expected = '12345_video_fr_FR_alts_applied_speech_adaptation_applied_boost_5_stop_words_removed_contractions_expanded_numbers_converted_2_words' assert result == expected
# Generate hyp speech_to_text = SpeechToText() if use_fake_hyp: hyp = 'this is a fake hyp' elif local_files_path: file = local_files_path + '/' + audio hyp = speech_to_text.transcribe_streaming(file, configuration) else: hyp = speech_to_text.get_hypothesis(audio, configuration) logger.debug(f'RAW HYP: {hyp}') logger.debug(f'RAW REF {ref}') unique_root = utilities.create_unique_root(root, configuration, nlp_model) io_handler.write_hyp(file_name=unique_root + '.txt', text=hyp) if not only_transcribe: # Calculate WER wer_obj = SimpleWER() if process_each_letter: logger.debug('PROCESSING EACH CHARACTER') hyp = list(hyp) hyp = ' '.join(hyp) ref = list(ref) ref = ' '.join(ref) logger.debug(f'HYP FOR WER: {hyp}') logger.debug(f'REF FOR WER: {ref}')