def test_apply_nlp_options_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = '34 567' options = NLPOptions() model = NLPModel() model.set_n2w(True) result = options.apply_nlp_options(model, hyp) expected = ' thirty-four five hundred and sixty-seven' assert result == expected
def test_apply_nlp_options_n2w_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "666 don't" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_expand_contractions(True) result = options.apply_nlp_options(model, hyp) expected = ' six hundred and sixty-six do not' assert result == expected
def test_apply_nlp_options_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "in the 666" options = NLPOptions() model = NLPModel() model.set_remove_stop_words(True) model.set_n2w(True) result = options.apply_nlp_options(model, hyp) expected = ' six hundred sixtysix' assert result == expected
def test_apply_nlp_options_stem_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = 'thingy dancing 4' options = NLPOptions() model = NLPModel() model.set_apply_stemming(True) model.set_n2w(True) result = options.apply_nlp_options(model, hyp) expected = 'thingi danc four ' assert result == expected
def test_stem_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "don't in the won't didn't isn't raining 3 6 9" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'nt wo nt nt nt rain three six nine ' assert result == expected
def test_stem_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "666 don't in the raining" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'six hundr sixtysix nt rain ' assert result == expected
def test_create_unique_root_2(): from utilities.utilities import Utilities from model.configuration import Configuration from model.nlp import NLPModel u = Utilities() configuration = Configuration() nlp_model = NLPModel() root = '12345' configuration.set_model('video') configuration.set_use_enhanced(False) configuration.set_language_code('fr_FR') configuration.set_alternative_language_codes(['en-US', 'ru-RU']) configuration.set_speech_context('hi', 5) nlp_model.set_remove_stop_words(True) nlp_model.set_apply_stemming(False) nlp_model.set_expand_contractions(True) nlp_model.set_n2w(True) result = u.create_unique_root(root, configuration, nlp_model) expected = '12345_video_fr_FR_alts_applied_speech_adaptation_applied_boost_5_stop_words_removed_contractions_expanded_numbers_converted_2_words' assert result == expected
nlp_model = NLPModel() io_handler = IOHandler() nlp_options = NLPOptions() configuration = Configuration() # Turn on punctuation .. why not.. no bearing on WER configuration.set_enableAutomaticPunctuation(True) args = parser.parse_args() no_zeros_for_boost = args.no_zeros_boost process_each_letter = args.single_word local_files_path = args.local_files_path limit = args.limit cloud_store_uri = args.cloud_store_uri io_handler.set_result_path(args.local_results_path) only_transcribe = args.transcriptions_only nlp_model.set_n2w(args.numbers_to_words) nlp_model.set_apply_stemming(args.stem) nlp_model.set_remove_stop_words(args.remove_stop_words) nlp_model.set_expand_contractions(args.expand) models = args.models enhance = args.enhanced enc = args.encoding sample_rate_hertz = args.sample_rate_hertz language_codes = args.langs phrase_file_path = args.phrase_file boosts = [int(i) for i in args.boosts] if not no_zeros_for_boost: boosts.append(0) alternative_language_codes = args.alternative_languages encoding = args.encoding