Python string_sequence_to_tuples 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: neuroquery.tokenization

메소드/함수: string_sequence_to_tuples

hotexamples.com에서의 예제들: 2

Python string_sequence_to_tuples - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 neuroquery.tokenization.string_sequence_to_tuples에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_tuples_and_strings():
    tuples = [("a", "b"), ("c", ), ("de", "fg")]
    strings = ["a b", "c", "de fg"]
    assert tokenization.tuple_sequence_to_strings(tuples) == strings
    assert tokenization.tuple_sequence_to_strings(strings) == strings
    assert tokenization.string_sequence_to_tuples(strings) == tuples
    assert tokenization.string_sequence_to_tuples(tuples) == tuples

예제 #2

파일 보기

def test_tokenizing_pipeline(voc_mapping, with_frequencies):
    tok = tokenization.tokenizing_pipeline_from_vocabulary_file(
        VOCABULARY_FILE, voc_mapping=voc_mapping
    )
    if not with_frequencies:
        tok.frequencies = None
    if voc_mapping == {}:
        assert tok("the working memory group xyzzzz groups") == [
            "working memory",
            "group",
            "groups",
        ]
    else:
        assert tok("the working memory group xyzzzz groups") == [
            "working memory",
            "group",
            "group",
        ]
    assert tok.get_full_vocabulary(
        as_tuples=True
    ) == tokenization.string_sequence_to_tuples(tok.get_full_vocabulary())
    assert tok.get_vocabulary(
        as_tuples=True
    ) == tokenization.string_sequence_to_tuples(tok.get_vocabulary())
    if voc_mapping == "auto":
        assert len(tok.get_full_vocabulary()) == len(tok.get_vocabulary()) + 2
    else:
        assert len(tok.get_full_vocabulary()) == len(tok.get_vocabulary())
    assert len(tok.get_frequencies()) == len(tok.get_vocabulary())
    if with_frequencies:
        assert hasattr(tok, "frequencies_")
        assert len(tok.get_frequencies()) == len(tok.get_vocabulary())
    with tempfile.TemporaryDirectory() as tmp_dir:
        voc_file = os.path.join(tmp_dir, "voc_file.csv")
        tok.to_vocabulary_file(voc_file)
        loaded = tokenization.tokenizing_pipeline_from_vocabulary_file(
            voc_file, voc_mapping=voc_mapping
        )
        assert (
            loaded.vocabulary_mapping_.voc_mapping
            == tok.vocabulary_mapping_.voc_mapping
        )
        assert loaded.get_full_vocabulary() == tok.get_full_vocabulary()
        assert loaded.get_vocabulary() == tok.get_vocabulary()