def test_remove_stop_words():
    from utilities.nlp_options import NLPOptions
    options = NLPOptions()
    data = "This is the way the world ends, not with a bang but with a whimper"
    expected = " This way world ends bang whimper"
    results = options.remove_stop_words(data)
    assert results == expected
def test_apply_stemming():
    from utilities.nlp_options import NLPOptions
    options = NLPOptions()
    data = 'the quick brown fox jumped over the lazy dog'
    expected = 'the quick brown fox jump over the lazi dog '
    result = options.apply_stemming(data)
    assert result == expected
def test_expand_contractions_isnt():
    from utilities.nlp_options import NLPOptions
    options = NLPOptions()
    expected = "is not"
    data = "isn't"
    result = options.expand_contractions(data)
    assert result == expected
def test_convert_numbers_to_words():
    from utilities.nlp_options import NLPOptions
    options = NLPOptions()
    data = '10'
    expected = ' ten'
    result = options.convert_numbers_to_words(data)
    assert result == expected
def test_expand_contractions_yall():
    from utilities.nlp_options import NLPOptions
    options = NLPOptions()
    expected = "you all"
    data = "y'all"
    result = options.expand_contractions(data)
    assert result == expected
def test_apply_nlp_options_stop():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = 'thingy in the dancing'
    options = NLPOptions()
    model = NLPModel()
    model.set_remove_stop_words(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' thingy dancing'
    assert result == expected
def test_apply_nlp_options_stem():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = 'thingy dancing'
    options = NLPOptions()
    model = NLPModel()
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'thingi danc '
    assert result == expected
def test_apply_nlp_options_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = '34 567'
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' thirty-four five hundred and sixty-seven'
    assert result == expected
def test_apply_nlp_options_expand_cont():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "isn't can't"
    options = NLPOptions()
    model = NLPModel()
    model.set_expand_contractions(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'is not cannot'
    assert result == expected
def test_apply_nlp_options_stem_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "thingy dancing won't"
    options = NLPOptions()
    model = NLPModel()
    model.set_apply_stemming(True)
    model.set_expand_contractions(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'thingi danc will not '
    assert result == expected
def test_apply_nlp_options_n2w_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "666 don't"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_expand_contractions(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' six hundred and sixty-six do not'
    assert result == expected
def test_apply_nlp_options_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "in the 666"
    options = NLPOptions()
    model = NLPModel()
    model.set_remove_stop_words(True)
    model.set_n2w(True)
    result = options.apply_nlp_options(model, hyp)
    expected = ' six hundred sixtysix'
    assert result == expected
def test_stem_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "don't in the won't didn't isn't raining 3 6 9"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'nt wo nt nt nt rain three six nine '
    assert result == expected
def test_stem_stop_exp():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "don't in the won't didn't isn't raining"
    options = NLPOptions()
    model = NLPModel()
    model.set_expand_contractions(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'rain '
    assert result == expected
def test_stem_stop_n2w():
    from utilities.nlp_options import NLPOptions
    from model.nlp import NLPModel
    hyp = "666 don't in the raining"
    options = NLPOptions()
    model = NLPModel()
    model.set_n2w(True)
    model.set_remove_stop_words(True)
    model.set_apply_stemming(True)
    result = options.apply_nlp_options(model, hyp)
    expected = 'six hundr sixtysix nt rain '
    assert result == expected
def test_nlp_options_init():
    from utilities.nlp_options import NLPOptions
    options = NLPOptions()
    result = options.contractions
    assert isinstance(result, dict)
    for value in result.values():
        assert isinstance(value, str)
    parser.add_argument('-p', '--phrase_file', required=False, type=str,
                        help='Path to file containing comma separated phrases')
    parser.add_argument('-b', '--boosts', default=list(), nargs='+', required=False,
                        help=('Space separated list of boost values to evaluate for speech adaptation'))
    parser.add_argument('-ch', '--multi', required=False, type=int,
                        help='Integer indicating the number of channels if more than one')
    parser.add_argument('-q', '--random_queue', required=False, action='store_true', help='Replaces default queue.txt with randomly named queue file')
    parser.add_argument('-fake', '--fake_hyp',  required=False, action='store_true', help='Use a fake hypothesis for testing')
    parser.add_argument('-limit', '--limit', required=False, default=None,type= int,  help = 'Limit to X number of audio files')
    parser.add_argument('-nzb', '--no_zeros_boost', required=False,  action='store_true', help='skip boost of 0' )
    parser.add_argument('-single', '--single_word', required=False, action='store_true', help='process each letter rather than whole words')
    parser.add_argument('-lf','--local_files_path', required=False, type=str, help='process local files',  default=None)

    nlp_model = NLPModel()
    io_handler = IOHandler()
    nlp_options = NLPOptions()
    configuration = Configuration()
    # Turn on punctuation ..  why not.. no bearing on WER
    configuration.set_enableAutomaticPunctuation(True)

    args = parser.parse_args()
    no_zeros_for_boost = args.no_zeros_boost
    process_each_letter = args.single_word
    local_files_path = args.local_files_path
    limit = args.limit
    cloud_store_uri = args.cloud_store_uri
    io_handler.set_result_path(args.local_results_path)
    only_transcribe = args.transcriptions_only
    nlp_model.set_n2w(args.numbers_to_words)
    nlp_model.set_apply_stemming(args.stem)
    nlp_model.set_remove_stop_words(args.remove_stop_words)