def test_apply_nlp_options_stem():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = 'thingy dancing'
    options = NLPOptions()
    model = NLPModel()
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'thingi danc '
    assert result == expected
def test_apply_nlp_options_stem_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "thingy dancing won't"
    options = NLPOptions()
    model = NLPModel()
    model.set_apply_stemming(True)
    model.set_expand_contractions(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'thingi danc will not '
    assert result == expected
def test_stem_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "don't in the won't didn't isn't raining 3 6 9"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'nt wo nt nt nt rain three six nine '
    assert result == expected
def test_stem_stop_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "don't in the won't didn't isn't raining"
    options = NLPOptions()
    model = NLPModel()
    model.set_expand_contractions(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'rain '
    assert result == expected
def test_stem_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "666 don't in the raining"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'six hundr sixtysix nt rain '
    assert result == expected
def test_apply_nlp_options_stem_n2w_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "666 don't in the raining"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_expand_contractions(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'six hundr and sixty-six do not in the rain '
    assert result == expected
예제 #7
0
def test_create_unique_root_2():
    from utilities.utilities import Utilities
    from model.configuration import Configuration
    from model.nlp import NLPModel
    u = Utilities()
    configuration = Configuration()
    nlp_model = NLPModel()
    root = '12345'
    configuration.set_model('video')
    configuration.set_use_enhanced(False)
    configuration.set_language_code('fr_FR')
    configuration.set_alternative_language_codes(['en-US', 'ru-RU'])
    configuration.set_speech_context('hi', 5)
    nlp_model.set_remove_stop_words(True)
    nlp_model.set_apply_stemming(False)
    nlp_model.set_expand_contractions(True)
    nlp_model.set_n2w(True)
    result = u.create_unique_root(root, configuration, nlp_model)
    expected = '12345_video_fr_FR_alts_applied_speech_adaptation_applied_boost_5_stop_words_removed_contractions_expanded_numbers_converted_2_words'
    assert result == expected
def test_update_csv():
    from utilities.io_handler import IOHandler
    from model.configuration import Configuration
    from model.nlp import NLPModel
    import os
    configuration = Configuration()
    nlp_model = NLPModel()
    io = IOHandler()
    result_file_name = io._result_file_name
    io.set_result_path('test_results_path')
    io.write_csv_header()
    expected_uri = 'gs://foo/bar/baz/test.flac'
    expected_lang = 'fr-FR'
    nlp_model.set_apply_stemming(True)
    configuration.set_language_code(expected_lang)
    io.update_csv(expected_uri, configuration, nlp_model)
    full_path = f'{io.get_result_path()}/{result_file_name}'

    with open(full_path, 'r') as file:
        contents = file.read()
        os.remove(full_path)
        assert expected_uri in contents
        assert expected_lang in contents
        assert 'True' in contents
    io_handler = IOHandler()
    nlp_options = NLPOptions()
    configuration = Configuration()
    # Turn on punctuation ..  why not.. no bearing on WER
    configuration.set_enableAutomaticPunctuation(True)

    args = parser.parse_args()
    no_zeros_for_boost = args.no_zeros_boost
    process_each_letter = args.single_word
    local_files_path = args.local_files_path
    limit = args.limit
    cloud_store_uri = args.cloud_store_uri
    io_handler.set_result_path(args.local_results_path)
    only_transcribe = args.transcriptions_only
    nlp_model.set_n2w(args.numbers_to_words)
    nlp_model.set_apply_stemming(args.stem)
    nlp_model.set_remove_stop_words(args.remove_stop_words)
    nlp_model.set_expand_contractions(args.expand)
    models = args.models
    enhance = args.enhanced
    enc = args.encoding
    sample_rate_hertz = args.sample_rate_hertz
    language_codes = args.langs
    phrase_file_path = args.phrase_file
    boosts = [int(i) for i in args.boosts]

    if not no_zeros_for_boost:
        boosts.append(0)
    alternative_language_codes = args.alternative_languages
    encoding = args.encoding