def test_apply_nlp_options_stem(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = 'thingy dancing' options = NLPOptions() model = NLPModel() model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'thingi danc ' assert result == expected
def test_apply_nlp_options_stem_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "thingy dancing won't" options = NLPOptions() model = NLPModel() model.set_apply_stemming(True) model.set_expand_contractions(True) result = options.apply_nlp_options(model, hyp) expected = 'thingi danc will not ' assert result == expected
def test_stem_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "don't in the won't didn't isn't raining 3 6 9" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'nt wo nt nt nt rain three six nine ' assert result == expected
def test_stem_stop_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "don't in the won't didn't isn't raining" options = NLPOptions() model = NLPModel() model.set_expand_contractions(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'rain ' assert result == expected
def test_stem_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "666 don't in the raining" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'six hundr sixtysix nt rain ' assert result == expected
def test_apply_nlp_options_stem_n2w_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "666 don't in the raining" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_expand_contractions(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'six hundr and sixty-six do not in the rain ' assert result == expected
def test_create_unique_root_2(): from utilities.utilities import Utilities from model.configuration import Configuration from model.nlp import NLPModel u = Utilities() configuration = Configuration() nlp_model = NLPModel() root = '12345' configuration.set_model('video') configuration.set_use_enhanced(False) configuration.set_language_code('fr_FR') configuration.set_alternative_language_codes(['en-US', 'ru-RU']) configuration.set_speech_context('hi', 5) nlp_model.set_remove_stop_words(True) nlp_model.set_apply_stemming(False) nlp_model.set_expand_contractions(True) nlp_model.set_n2w(True) result = u.create_unique_root(root, configuration, nlp_model) expected = '12345_video_fr_FR_alts_applied_speech_adaptation_applied_boost_5_stop_words_removed_contractions_expanded_numbers_converted_2_words' assert result == expected
def test_update_csv(): from utilities.io_handler import IOHandler from model.configuration import Configuration from model.nlp import NLPModel import os configuration = Configuration() nlp_model = NLPModel() io = IOHandler() result_file_name = io._result_file_name io.set_result_path('test_results_path') io.write_csv_header() expected_uri = 'gs://foo/bar/baz/test.flac' expected_lang = 'fr-FR' nlp_model.set_apply_stemming(True) configuration.set_language_code(expected_lang) io.update_csv(expected_uri, configuration, nlp_model) full_path = f'{io.get_result_path()}/{result_file_name}' with open(full_path, 'r') as file: contents = file.read() os.remove(full_path) assert expected_uri in contents assert expected_lang in contents assert 'True' in contents
io_handler = IOHandler() nlp_options = NLPOptions() configuration = Configuration() # Turn on punctuation .. why not.. no bearing on WER configuration.set_enableAutomaticPunctuation(True) args = parser.parse_args() no_zeros_for_boost = args.no_zeros_boost process_each_letter = args.single_word local_files_path = args.local_files_path limit = args.limit cloud_store_uri = args.cloud_store_uri io_handler.set_result_path(args.local_results_path) only_transcribe = args.transcriptions_only nlp_model.set_n2w(args.numbers_to_words) nlp_model.set_apply_stemming(args.stem) nlp_model.set_remove_stop_words(args.remove_stop_words) nlp_model.set_expand_contractions(args.expand) models = args.models enhance = args.enhanced enc = args.encoding sample_rate_hertz = args.sample_rate_hertz language_codes = args.langs phrase_file_path = args.phrase_file boosts = [int(i) for i in args.boosts] if not no_zeros_for_boost: boosts.append(0) alternative_language_codes = args.alternative_languages encoding = args.encoding