Python expand_compound_tokenの例

プログラミング言語: Python

名前空間/パッケージ名: tmtoolkit.preprocess

メソッド/関数: expand_compound_token

hotexamples.comのコード掲載数: 4

Python expand_compound_token - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtmtoolkit.preprocess.expand_compound_tokenの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def test_expand_compound_token_hypothesis(s, split_chars, split_on_len,
                                          split_on_casechange):
    if not split_on_len and not split_on_casechange:
        with pytest.raises(ValueError):
            expand_compound_token(s,
                                  split_chars,
                                  split_on_len=split_on_len,
                                  split_on_casechange=split_on_casechange)
    else:
        res = expand_compound_token(s,
                                    split_chars,
                                    split_on_len=split_on_len,
                                    split_on_casechange=split_on_casechange)

        assert type(res) is list

        if len(s) == 0:
            assert res == []

        assert all(p for p in res)

        # if res and split_chars and any(c in s for c in split_chars):
        #     if split_on_len:
        #         assert min(map(len, res)) >= split_on_len

        for p in res:
            assert all(c not in p for c in split_chars)

コード例 #2

ファイルを表示

ファイル: test_preprocess_func.py プロジェクト: yushu-liu/tmtoolkit

def test_expand_compound_token_hypothesis(s, split_chars, split_on_len, split_on_casechange):
    if not split_on_len and not split_on_casechange:
        with pytest.raises(ValueError):
            expand_compound_token(s, split_chars, split_on_len=split_on_len, split_on_casechange=split_on_casechange)
    else:
        res = expand_compound_token(s, split_chars, split_on_len=split_on_len, split_on_casechange=split_on_casechange)

        assert isinstance(res, list)
        assert len(res) > 0

        s_contains_split_char = any(c in s for c in split_chars)
        s_is_split_chars = all(c in split_chars for c in s)

        if not s_contains_split_char:   # nothing to split on
            assert res == [s]

        if len(s) > 0:
            assert all([p for p in res])

        if not s_is_split_chars:
            for p in res:
                assert all(c not in p for c in split_chars)

コード例 #3

ファイルを表示

def test_expand_compound_token():
    assert expand_compound_token('US-Student') == ['US', 'Student']
    assert expand_compound_token('US-Student-X') == ['US', 'StudentX']
    assert expand_compound_token('Student-X') == ['StudentX']
    assert expand_compound_token('Do-Not-Disturb') == ['Do', 'Not', 'Disturb']
    assert expand_compound_token('E-Mobility-Strategy') == [
        'EMobility', 'Strategy'
    ]

    assert expand_compound_token('US-Student',
                                 split_on_len=None,
                                 split_on_casechange=True) == ['USStudent']
    assert expand_compound_token('Do-Not-Disturb',
                                 split_on_len=None,
                                 split_on_casechange=True) == [
                                     'Do', 'Not', 'Disturb'
                                 ]
    assert expand_compound_token('E-Mobility-Strategy',
                                 split_on_len=None,
                                 split_on_casechange=True) == [
                                     'EMobility', 'Strategy'
                                 ]

    assert expand_compound_token('US-Student',
                                 split_on_len=2,
                                 split_on_casechange=True) == [
                                     'US', 'Student'
                                 ]
    assert expand_compound_token('Do-Not-Disturb',
                                 split_on_len=2,
                                 split_on_casechange=True) == [
                                     'Do', 'Not', 'Disturb'
                                 ]
    assert expand_compound_token('E-Mobility-Strategy',
                                 split_on_len=2,
                                 split_on_casechange=True) == [
                                     'EMobility', 'Strategy'
                                 ]

    assert expand_compound_token('E-Mobility-Strategy', split_on_len=1) == [
        'E', 'Mobility', 'Strategy'
    ]

コード例 #4

ファイルを表示

ファイル: test_preprocess_func.py プロジェクト: yushu-liu/tmtoolkit

def test_expand_compound_token():
    assert expand_compound_token('US-Student') == ['US', 'Student']
    assert expand_compound_token('US-Student-X') == ['US', 'StudentX']
    assert expand_compound_token('Student-X') == ['StudentX']
    assert expand_compound_token('Do-Not-Disturb') == ['Do', 'Not', 'Disturb']
    assert expand_compound_token('E-Mobility-Strategy') == ['EMobility', 'Strategy']

    for inp, expected in zip(['US-Student', 'Do-Not-Disturb', 'E-Mobility-Strategy'],
                             [['USStudent'], ['Do', 'Not', 'Disturb'], ['EMobility', 'Strategy']]):
        assert expand_compound_token(inp, split_on_len=None, split_on_casechange=True) == expected

    for inp, expected in zip(['US-Student', 'Do-Not-Disturb', 'E-Mobility-Strategy'],
                             [['US', 'Student'], ['Do', 'Not', 'Disturb'], ['EMobility', 'Strategy']]):
        assert expand_compound_token(inp, split_on_len=2, split_on_casechange=True) == expected

    assert expand_compound_token('E-Mobility-Strategy', split_on_len=1) == ['E', 'Mobility', 'Strategy']

    assert expand_compound_token('') == ['']

    assert expand_compound_token('Te;s,t', split_chars=[';', ','], split_on_len=1, split_on_casechange=False) \
           == expand_compound_token('Te-s-t', split_chars=['-'], split_on_len=1, split_on_casechange=False) \
           == ['Te', 's', 't']