Example #1
0
def test_true_true_code_bytes(abspath_mock, bpe_learner_mock, dataset_mock):

    # given
    abspath_mock.return_value = PATH_TO_DATASET_STUB
    dataset_mock.create = Mock(spec=dataset_mock, return_value=dataset_mock)
    argv = [
        'learn-bpe', '1000', '-p', PATH_TO_DATASET_STUB, '--bytes',
        '--word-end'
    ]

    # when
    parse_and_run(argv)

    # then
    prep_config = PrepConfig({
        PrepParam.EN_ONLY: 'u',
        PrepParam.COM: '0',
        PrepParam.STR: 'E',
        PrepParam.SPLIT: 'F',
        PrepParam.TABS_NEWLINES: 's',
        PrepParam.CASE: 'u'
    })
    bpe_config = BpeConfig({
        BpeParam.CASE: 'yes',
        BpeParam.WORD_END: True,
        BpeParam.BASE: 'code',
        BpeParam.UNICODE: 'bytes',
    })
    dataset_mock.create.assert_called_with(PATH_TO_DATASET_STUB, prep_config,
                                           None, None, bpe_config)
    bpe_learner_mock.run.assert_called_with(dataset_mock, 1000, bpe_config)
Example #2
0
def test_xx0Fxx_max_str_length():
    argv = [
        'nosplit', 'str', '-e', 'java', '--no-spaces', '--no-str', '--no-com',
        '--full-strings'
    ]
    with pytest.raises(DocoptExit):
        parse_and_run(argv)
Example #3
0
def test_learn_bpe_codes():
    if platform.system() != 'Darwin':
        parse_and_run(['learn-bpe', '100', '-p', PATH_TO_TEST_CORPUS, '-e', 'java'])
        parse_and_run(['learn-bpe', '150', '-p', PATH_TO_TEST_CORPUS, '-e', 'java'])

        api.bpe(path=PATH_TO_TEST_CORPUS, bpe_codes_id='test-corpus-130', extensions="java", output_path=TEST_OUTPUT)
    else:
        print('Skipping the test on OSx.')
Example #4
0
def test_xxxFsx(api_mock):
    argv = ['nosplit', 'str', '-e', 'java', '--full-strings']
    parse_and_run(argv)
    prep_config = PrepConfig({
        PrepParam.EN_ONLY: 'u',
        PrepParam.COM: 'c',
        PrepParam.STR: '1',
        PrepParam.SPLIT: 'F',
        PrepParam.TABS_NEWLINES: 's',
        PrepParam.CASE: 'u'
    })
    api_mock.text.preprocess.assert_called_with("str",
                                                prep_config,
                                                None,
                                                extension="java")
Example #5
0
def test_all_short_config_options(api_mock):
    argv = ['basic', 'str', '-e', 'java', '-0lSCU']
    parse_and_run(argv)
    prep_config = PrepConfig({
        PrepParam.EN_ONLY: 'U',
        PrepParam.COM: '0',
        PrepParam.STR: '0',
        PrepParam.SPLIT: '1',
        PrepParam.TABS_NEWLINES: '0',
        PrepParam.CASE: 'l'
    })
    api_mock.text.preprocess.assert_called_with("str",
                                                prep_config,
                                                None,
                                                extension="java")
Example #6
0
def test_xxx1xu(api_mock):
    argv = ['basic', 'str', '-e', 'java', '--no-spaces']
    parse_and_run(argv)
    prep_config = PrepConfig({
        PrepParam.EN_ONLY: 'u',
        PrepParam.COM: 'c',
        PrepParam.STR: '1',
        PrepParam.SPLIT: '1',
        PrepParam.TABS_NEWLINES: '0',
        PrepParam.CASE: 'u'
    })
    api_mock.text.preprocess.assert_called_with("str",
                                                prep_config,
                                                None,
                                                extension="java")
Example #7
0
def test_path_short(api_mock):
    argv = ['nosplit', '-p', PATH_TO_DATASET_STUB, '--no-spaces']
    parse_and_run(argv)
    prep_config = PrepConfig({
        PrepParam.EN_ONLY: 'u',
        PrepParam.COM: 'c',
        PrepParam.STR: '1',
        PrepParam.SPLIT: '0',
        PrepParam.TABS_NEWLINES: '0',
        PrepParam.CASE: 'u'
    })
    api_mock.corpus.preprocess_corpus.assert_called_with(PATH_TO_DATASET_STUB,
                                                         prep_config,
                                                         None,
                                                         calc_vocab=False,
                                                         extensions=None,
                                                         output_path=None)
Example #8
0
def test_output_with_text():
    argv = ['nosplit', 'str', '-o', PATH_TO_OUTPUT_STUB, '--no-spaces']
    with pytest.raises(DocoptExit) as context:
        parse_and_run(argv)
Example #9
0
def test_xxA8xx():
    argv = ['chars', 'str', '-e', 'java', '--no-str', '--max-str-length=10']
    with pytest.raises(DocoptExit):
        parse_and_run(argv)
Example #10
0
def test_xx0xxx_with_max_str_length():
    argv = ['basic', 'str', '-e', 'java', '--no-str', '--max-str-length=10']
    with pytest.raises(DocoptExit):
        parse_and_run(argv)
Example #11
0
def test_xxA1xx(api_mock):
    argv = ['basic', 'str', '-e', 'java', '--no-str', '--max-str-length=10']
    with pytest.raises(DocoptExit):
        parse_and_run(argv)
Example #12
0
def test_xxx0x1():
    argv = ['nosplit', 'str', '-e', 'java', '--no-spaces', '--no-case']
    with pytest.raises(DocoptExit) as context:
        parse_and_run(argv)
Example #13
0
def main():
    parse_and_run(sys.argv[1:])