Python Alignment.from_strings 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: spacy.training

클래스/타입: Alignment

메소드/함수: from_strings

hotexamples.com에서의 예제들: 5

Python Alignment.from_strings - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 spacy.training.Alignment.from_strings에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

from_strings(5)

자주 사용되는 메소드들

from_strings (5)

예제 #1

파일 보기

파일: test_training.py 프로젝트: xettrisomeman/spaCy

def test_alignment_case_insensitive():
    other_tokens = ["I", "listened", "to", "obama", "'", "s", "podcasts", "."]
    spacy_tokens = ["i", "listened", "to", "Obama", "'s", "PODCASTS", "."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [1, 1, 1, 1, 1, 1, 1, 1]
    assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 6]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 1, 1]
    assert list(align.y2x.dataXd) == [0, 1, 2, 3, 4, 5, 6, 7]

예제 #2

파일 보기

파일: test_training.py 프로젝트: admariner/spaCy

def test_alignment_complex():
    other_tokens = ["i listened to", "obama", "'", "s", "podcasts", "."]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1]
    assert list(align.x2y.dataXd) == [0, 1, 2, 3, 4, 4, 5, 5]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
    assert list(align.y2x.dataXd) == [0, 0, 0, 1, 2, 3, 4, 5]

예제 #3

파일 보기

파일: assignment2.py 프로젝트: LunaBaozi/NLU_assignment2

def get_spacy_alignment(ref, hyp) -> list:

    # The function returns the aligned tokenization between
    # a reference corpus and a hypothesis corpus
    # :param corpus: the corpus in coNLL format of which we want to reconstruct sentences
    # :return: a list cotaining alignment objects

    alignment = list()

    # Iterate over pairs of objects
    for i, j in zip(ref, hyp):

        # Compute the alignment between the two objects
        alignment.append(Alignment.from_strings(i, j))

    return alignment

예제 #4

파일 보기

def test_alignment_spaces(en_vocab):
    # single leading whitespace
    other_tokens = [" ", "i listened to", "obama", "'", "s", "podcasts", "."]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [0, 3, 1, 1, 1, 1, 1]
    assert list(align.x2y.data) == [0, 1, 2, 3, 4, 4, 5, 5]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
    assert list(align.y2x.data) == [1, 1, 1, 2, 3, 4, 5, 6]

    # multiple leading whitespace tokens
    other_tokens = [" ", " ", "i listened to", "obama", "'", "s", "podcasts", "."]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [0, 0, 3, 1, 1, 1, 1, 1]
    assert list(align.x2y.data) == [0, 1, 2, 3, 4, 4, 5, 5]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
    assert list(align.y2x.data) == [2, 2, 2, 3, 4, 5, 6, 7]

    # both with leading whitespace, not identical
    other_tokens = [" ", " ", "i listened to", "obama", "'", "s", "podcasts", "."]
    spacy_tokens = [" ", "i", "listened", "to", "obama", "'s", "podcasts."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [1, 0, 3, 1, 1, 1, 1, 1]
    assert list(align.x2y.data) == [0, 1, 2, 3, 4, 5, 5, 6, 6]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 1, 2, 2]
    assert list(align.y2x.data) == [0, 2, 2, 2, 3, 4, 5, 6, 7]

    # same leading whitespace, different tokenization
    other_tokens = [" ", " ", "i listened to", "obama", "'", "s", "podcasts", "."]
    spacy_tokens = ["  ", "i", "listened", "to", "obama", "'s", "podcasts."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [1, 1, 3, 1, 1, 1, 1, 1]
    assert list(align.x2y.data) == [0, 0, 1, 2, 3, 4, 5, 5, 6, 6]
    assert list(align.y2x.lengths) == [2, 1, 1, 1, 1, 2, 2]
    assert list(align.y2x.data) == [0, 1, 2, 2, 2, 3, 4, 5, 6, 7]

    # only one with trailing whitespace
    other_tokens = ["i listened to", "obama", "'", "s", "podcasts", ".", " "]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts."]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1, 0]
    assert list(align.x2y.data) == [0, 1, 2, 3, 4, 4, 5, 5]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2]
    assert list(align.y2x.data) == [0, 0, 0, 1, 2, 3, 4, 5]

    # different trailing whitespace
    other_tokens = ["i listened to", "obama", "'", "s", "podcasts", ".", " ", " "]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts.", " "]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1, 1, 0]
    assert list(align.x2y.data) == [0, 1, 2, 3, 4, 4, 5, 5, 6]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2, 1]
    assert list(align.y2x.data) == [0, 0, 0, 1, 2, 3, 4, 5, 6]

    # same trailing whitespace, different tokenization
    other_tokens = ["i listened to", "obama", "'", "s", "podcasts", ".", " ", " "]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts.", "  "]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.lengths) == [3, 1, 1, 1, 1, 1, 1, 1]
    assert list(align.x2y.data) == [0, 1, 2, 3, 4, 4, 5, 5, 6, 6]
    assert list(align.y2x.lengths) == [1, 1, 1, 1, 2, 2, 2]
    assert list(align.y2x.data) == [0, 0, 0, 1, 2, 3, 4, 5, 6, 7]

    # differing whitespace is allowed
    other_tokens = ["a", " \n ", "b", "c"]
    spacy_tokens = ["a", "b", " ", "c"]
    align = Alignment.from_strings(other_tokens, spacy_tokens)
    assert list(align.x2y.data) == [0, 1, 3]
    assert list(align.y2x.data) == [0, 2, 3]

    # other differences in whitespace are allowed
    other_tokens = [" ", "a"]
    spacy_tokens = ["  ", "a", " "]
    align = Alignment.from_strings(other_tokens, spacy_tokens)

    other_tokens = ["a", " "]
    spacy_tokens = ["a", "  "]
    align = Alignment.from_strings(other_tokens, spacy_tokens)

예제 #5

파일 보기

def test_alignment_different_texts():
    other_tokens = ["she", "listened", "to", "obama", "'s", "podcasts", "."]
    spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."]
    with pytest.raises(ValueError):
        Alignment.from_strings(other_tokens, spacy_tokens)