Python find_ngramsの例

プログラミング言語: Python

名前空間/パッケージ名: pyconll.util

メソッド/関数: find_ngrams

hotexamples.comのコード掲載数: 9

Python find_ngrams - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpyconll.util.find_ngramsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

0

ファイルを表示

ファイル: test_util.py プロジェクト: bittlingmayer/pyconll

def test_ngram_first_word_match():
    """
    Test that a first word match is not enough to match.
    """
    c = load_from_file(fixture_location('long.conll'))
    it = find_ngrams(c, 'un cabinet'.split())

    with pytest.raises(StopIteration):
        next(it)

コード例 #2

0

ファイルを表示

ファイル: test_util.py プロジェクト: bittlingmayer/pyconll

def test_ngram_standard():
    """
    Test if the find_ngram method works for standard situations.
    """
    c = load_from_file(fixture_location('basic.conll'))

    s, i = next(find_ngrams(c, 'un film sur la'.split()))
    assert s.id == 'fr-ud-dev_00001'
    assert i == 2

コード例 #3

0

ファイルを表示

ファイル: test_util.py プロジェクト: bittlingmayer/pyconll

def test_ngram_none():
    """
    Test that no ngram is identified when no exist
    """
    c = load_from_file(fixture_location('long.conll'))
    it = find_ngrams(c, 'cabinet'.split())

    with pytest.raises(StopIteration):
        next(it)

コード例 #4

0

ファイルを表示

ファイル: test_util.py プロジェクト: bittlingmayer/pyconll

def test_ngram_multiword_split():
    """
    Test that ngram searches still work when they go over a multiword token.
    """
    c = load_from_file(fixture_location('long.conll'))

    it = find_ngrams(c, 'de " décentrement de le Sujet "'.split())
    s, i = next(it)

    assert s.id == 'fr-ud-test_00002'
    assert i == 8

    with pytest.raises(StopIteration):
        next(it)

コード例 #5

0

ファイルを表示

ファイル: test_util.py プロジェクト: bittlingmayer/pyconll

def test_ngram_case_insensitive():
    """
    Test that the case sensitivity function works.
    """
    c = load_from_file(fixture_location('long.conll'))
    results = list(find_ngrams(c, 'Il'.split(), case_sensitive=False))

    actual_ids = list(map(lambda res: res[0].id, results))
    actual_indices = list(map(operator.itemgetter(1), results))

    expected_ids = ['fr-ud-test_00003', 'fr-ud-test_00005', 'fr-ud-test_00008']
    expected_indices = [1, 16, 0]

    assert actual_ids == expected_ids
    assert actual_indices == expected_indices

コード例 #6

0

ファイルを表示

ファイル: test_util.py プロジェクト: bittlingmayer/pyconll

def test_ngram_multiple_per_sentence():
    """
    Test that all ngrams are found when there are multiple in the same sentence.
    """
    c = load_from_file(fixture_location('long.conll'))
    results = list(find_ngrams(c, 'telle ou telle'.split()))

    actual_ids = list(map(lambda res: res[0].id, results))
    actual_indices = list(map(operator.itemgetter(1), results))

    expected_ids = ['fr-ud-test_00008', 'fr-ud-test_00008']
    expected_indices = [21, 26]

    assert actual_ids == expected_ids
    assert actual_indices == expected_indices

コード例 #7

0

ファイルを表示

def test_ngram_case_insensitive_n_token():
    """
    Test that the case sensitivity function works, when it is the nth token.
    """
    c = load_from_file(fixture_location('long.conll'))
    s, i, tokens = next(
        find_ngrams(c,
                    'l\' orgaNisaTion pour La sécurité et la'.split(),
                    case_sensitive=False))

    actual_token_ids = list(map(lambda token: token.id, tokens))
    expected_token_ids = ['9', '10', '11', '12', '13', '14', '15']

    assert s.id == 'fr-ud-test_00004'
    assert i == 8
    assert actual_token_ids == expected_token_ids

コード例 #8

0

ファイルを表示

def test_ngram_multiword_split():
    """
    Test that ngram searches still work when they go over a multiword token.
    """
    c = load_from_file(fixture_location('long.conll'))

    it = find_ngrams(c, 'de " décentrement de le Sujet "'.split())
    s, i, tokens = next(it)

    actual_token_ids = list(map(lambda token: token.id, tokens))
    expected_token_ids = ['9', '10', '11', '12', '13', '14', '15']

    assert s.id == 'fr-ud-test_00002'
    assert i == 8
    assert actual_token_ids == expected_token_ids

    with pytest.raises(StopIteration):
        next(it)

コード例 #9

0

ファイルを表示

def test_ngram_multiple_multiword_splits():
    """
    Test that ngram searches work when they there is more than one multiword token.
    """
    c = load_from_file(fixture_location('long.conll'))

    it = find_ngrams(
        c, 'civile de le territoire non autonome de le Sahara'.split())
    s, i, tokens = next(it)

    actual_token_ids = list(map(lambda token: token.id, tokens))
    expected_token_ids = ['10', '11', '12', '13', '14', '15', '16', '17', '18']

    assert s.id == 'fr-ud-test_00003'
    assert i == 9
    assert actual_token_ids == expected_token_ids

    with pytest.raises(StopIteration):
        next(it)