def test_apply_nlp_options_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = '34 567'
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' thirty-four five hundred and sixty-seven'
    assert result == expected
def test_apply_nlp_options_n2w_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "666 don't"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_expand_contractions(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' six hundred and sixty-six do not'
    assert result == expected
def test_apply_nlp_options_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "in the 666"
    options = NLPOptions()
    model = NLPModel()
    model.set_remove_stop_words(True)
    model.set_n2w(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' six hundred sixtysix'
    assert result == expected
def test_apply_nlp_options_stem_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = 'thingy dancing 4'
    options = NLPOptions()
    model = NLPModel()
    model.set_apply_stemming(True)
    model.set_n2w(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'thingi danc four '
    assert result == expected
def test_stem_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "don't in the won't didn't isn't raining 3 6 9"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'nt wo nt nt nt rain three six nine '
    assert result == expected
def test_stem_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "666 don't in the raining"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'six hundr sixtysix nt rain '
    assert result == expected
Beispiel #7
0
def test_create_unique_root_2():
    from utilities.utilities import Utilities
    from model.configuration import Configuration
    from model.nlp import NLPModel
    u = Utilities()
    configuration = Configuration()
    nlp_model = NLPModel()
    root = '12345'
    configuration.set_model('video')
    configuration.set_use_enhanced(False)
    configuration.set_language_code('fr_FR')
    configuration.set_alternative_language_codes(['en-US', 'ru-RU'])
    configuration.set_speech_context('hi', 5)
    nlp_model.set_remove_stop_words(True)
    nlp_model.set_apply_stemming(False)
    nlp_model.set_expand_contractions(True)
    nlp_model.set_n2w(True)
    result = u.create_unique_root(root, configuration, nlp_model)
    expected = '12345_video_fr_FR_alts_applied_speech_adaptation_applied_boost_5_stop_words_removed_contractions_expanded_numbers_converted_2_words'
    assert result == expected
    nlp_model = NLPModel()
    io_handler = IOHandler()
    nlp_options = NLPOptions()
    configuration = Configuration()
    # Turn on punctuation ..  why not.. no bearing on WER
    configuration.set_enableAutomaticPunctuation(True)

    args = parser.parse_args()
    no_zeros_for_boost = args.no_zeros_boost
    process_each_letter = args.single_word
    local_files_path = args.local_files_path
    limit = args.limit
    cloud_store_uri = args.cloud_store_uri
    io_handler.set_result_path(args.local_results_path)
    only_transcribe = args.transcriptions_only
    nlp_model.set_n2w(args.numbers_to_words)
    nlp_model.set_apply_stemming(args.stem)
    nlp_model.set_remove_stop_words(args.remove_stop_words)
    nlp_model.set_expand_contractions(args.expand)
    models = args.models
    enhance = args.enhanced
    enc = args.encoding
    sample_rate_hertz = args.sample_rate_hertz
    language_codes = args.langs
    phrase_file_path = args.phrase_file
    boosts = [int(i) for i in args.boosts]

    if not no_zeros_for_boost:
        boosts.append(0)
    alternative_language_codes = args.alternative_languages
    encoding = args.encoding