Python Token2Id.load 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: penelope.corpus

클래스/타입: Token2Id

메소드/함수: load

hotexamples.com에서의 예제들: 3

Python Token2Id.load - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 penelope.corpus.Token2Id.load에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Token2Id(29)

load(3)

to_id_set(2)

any_to_id2token(1)

replace(1)

예제 #1

파일 보기

파일: token2id_test.py 프로젝트: humlab/penelope

def test_token2id_store_and_load():

    os.makedirs('./tests/output', exist_ok=True)

    token2id: Token2Id = Token2Id().ingest(TEST_TOKENS_STREAM1)

    filename = './tests/output/test_vocabulary.zip'
    tf_filename = path_add_suffix(filename, "_tf", new_extension=".pbz2")

    token2id.store(filename=filename)

    assert os.path.isfile(filename) and os.path.isfile(tf_filename)

    token2id_loaded: Token2Id = Token2Id.load(filename=filename)

    assert token2id_loaded is not None
    assert token2id_loaded.tf is not None

    assert token2id_loaded.data == {
        'adam': 0,
        'anton': 1,
        'beatrice': 2,
        'felicia': 3,
        'niklas': 4
    }
    assert dict(token2id_loaded.tf) == {0: 3, 1: 2, 2: 1, 3: 1, 4: 1}

예제 #2

파일 보기

def test_interfaces_token2id_store():
    os.makedirs('./tests/output', exist_ok=True)

    filename: str = './tests/output/test_interfaces_token2id_store.zip'
    token2id = Token2Id()

    token2id.ingest(['apa', 'banan', 'soffa'])
    token2id.store(filename)

    assert pathlib.Path(filename).exists()

    token2id_loaded: Token2Id = Token2Id.load(filename)

    assert token2id.data == token2id_loaded.data

예제 #3

파일 보기

def test_co_occurrences_to_co_occurrence_corpus():

    folder, tag = './tests/test_data/ABCDEFG_7DOCS_CONCEPT', "ABCDEFG_7DOCS_CONCEPT"

    co_occurrences: CoOccurrenceDataFrame = co_occurrence.load_co_occurrences(
        co_occurrence_filename(folder, tag))
    document_index: DocumentIndex = DocumentIndexHelper.load(
        document_index_filename(folder, tag)).document_index
    token2id: Token2Id = Token2Id.load(vocabulary_filename(folder, tag))

    corpus = LegacyCoOccurrenceMixIn.from_co_occurrences(
        co_occurrences=co_occurrences,
        document_index=document_index,
        token2id=token2id,
    )

    assert corpus.data.sum() == co_occurrences.value.sum()
    assert corpus.data.shape[0] == len(document_index)
    assert corpus.data.shape[1] == len(co_occurrences[["w1_id", "w2_id"
                                                       ]].drop_duplicates())