def test_remove_stop_words(): from utilities.nlp_options import NLPOptions options = NLPOptions() data = "This is the way the world ends, not with a bang but with a whimper" expected = " This way world ends bang whimper" results = options.remove_stop_words(data) assert results == expected
def test_apply_stemming(): from utilities.nlp_options import NLPOptions options = NLPOptions() data = 'the quick brown fox jumped over the lazy dog' expected = 'the quick brown fox jump over the lazi dog ' result = options.apply_stemming(data) assert result == expected
def test_expand_contractions_isnt(): from utilities.nlp_options import NLPOptions options = NLPOptions() expected = "is not" data = "isn't" result = options.expand_contractions(data) assert result == expected
def test_convert_numbers_to_words(): from utilities.nlp_options import NLPOptions options = NLPOptions() data = '10' expected = ' ten' result = options.convert_numbers_to_words(data) assert result == expected
def test_expand_contractions_yall(): from utilities.nlp_options import NLPOptions options = NLPOptions() expected = "you all" data = "y'all" result = options.expand_contractions(data) assert result == expected
def test_apply_nlp_options_stop(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = 'thingy in the dancing' options = NLPOptions() model = NLPModel() model.set_remove_stop_words(True) result = options.apply_nlp_options(model, hyp) expected = ' thingy dancing' assert result == expected
def test_apply_nlp_options_stem(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = 'thingy dancing' options = NLPOptions() model = NLPModel() model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'thingi danc ' assert result == expected
def test_apply_nlp_options_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = '34 567' options = NLPOptions() model = NLPModel() model.set_n2w(True) result = options.apply_nlp_options(model, hyp) expected = ' thirty-four five hundred and sixty-seven' assert result == expected
def test_apply_nlp_options_expand_cont(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "isn't can't" options = NLPOptions() model = NLPModel() model.set_expand_contractions(True) result = options.apply_nlp_options(model, hyp) expected = 'is not cannot' assert result == expected
def test_apply_nlp_options_stem_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "thingy dancing won't" options = NLPOptions() model = NLPModel() model.set_apply_stemming(True) model.set_expand_contractions(True) result = options.apply_nlp_options(model, hyp) expected = 'thingi danc will not ' assert result == expected
def test_apply_nlp_options_n2w_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "666 don't" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_expand_contractions(True) result = options.apply_nlp_options(model, hyp) expected = ' six hundred and sixty-six do not' assert result == expected
def test_apply_nlp_options_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "in the 666" options = NLPOptions() model = NLPModel() model.set_remove_stop_words(True) model.set_n2w(True) result = options.apply_nlp_options(model, hyp) expected = ' six hundred sixtysix' assert result == expected
def test_stem_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "don't in the won't didn't isn't raining 3 6 9" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'nt wo nt nt nt rain three six nine ' assert result == expected
def test_stem_stop_exp(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "don't in the won't didn't isn't raining" options = NLPOptions() model = NLPModel() model.set_expand_contractions(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'rain ' assert result == expected
def test_stem_stop_n2w(): from utilities.nlp_options import NLPOptions from model.nlp import NLPModel hyp = "666 don't in the raining" options = NLPOptions() model = NLPModel() model.set_n2w(True) model.set_remove_stop_words(True) model.set_apply_stemming(True) result = options.apply_nlp_options(model, hyp) expected = 'six hundr sixtysix nt rain ' assert result == expected
def test_nlp_options_init(): from utilities.nlp_options import NLPOptions options = NLPOptions() result = options.contractions assert isinstance(result, dict) for value in result.values(): assert isinstance(value, str)
parser.add_argument('-p', '--phrase_file', required=False, type=str, help='Path to file containing comma separated phrases') parser.add_argument('-b', '--boosts', default=list(), nargs='+', required=False, help=('Space separated list of boost values to evaluate for speech adaptation')) parser.add_argument('-ch', '--multi', required=False, type=int, help='Integer indicating the number of channels if more than one') parser.add_argument('-q', '--random_queue', required=False, action='store_true', help='Replaces default queue.txt with randomly named queue file') parser.add_argument('-fake', '--fake_hyp', required=False, action='store_true', help='Use a fake hypothesis for testing') parser.add_argument('-limit', '--limit', required=False, default=None,type= int, help = 'Limit to X number of audio files') parser.add_argument('-nzb', '--no_zeros_boost', required=False, action='store_true', help='skip boost of 0' ) parser.add_argument('-single', '--single_word', required=False, action='store_true', help='process each letter rather than whole words') parser.add_argument('-lf','--local_files_path', required=False, type=str, help='process local files', default=None) nlp_model = NLPModel() io_handler = IOHandler() nlp_options = NLPOptions() configuration = Configuration() # Turn on punctuation .. why not.. no bearing on WER configuration.set_enableAutomaticPunctuation(True) args = parser.parse_args() no_zeros_for_boost = args.no_zeros_boost process_each_letter = args.single_word local_files_path = args.local_files_path limit = args.limit cloud_store_uri = args.cloud_store_uri io_handler.set_result_path(args.local_results_path) only_transcribe = args.transcriptions_only nlp_model.set_n2w(args.numbers_to_words) nlp_model.set_apply_stemming(args.stem) nlp_model.set_remove_stop_words(args.remove_stop_words)