Esempi in Python per Token2Id.load

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: penelope.corpus

Classe/tipologia: Token2Id

Metodo/funzione: load

Esempi su hotexamples.com: 3

Token2Id.load in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per penelope.corpus.Token2Id.load, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Token2Id(29)

load(3)

to_id_set(2)

any_to_id2token(1)

replace(1)

Esempio n. 1

Mostra file

File: token2id_test.py Progetto: humlab/penelope

def test_token2id_store_and_load():

    os.makedirs('./tests/output', exist_ok=True)

    token2id: Token2Id = Token2Id().ingest(TEST_TOKENS_STREAM1)

    filename = './tests/output/test_vocabulary.zip'
    tf_filename = path_add_suffix(filename, "_tf", new_extension=".pbz2")

    token2id.store(filename=filename)

    assert os.path.isfile(filename) and os.path.isfile(tf_filename)

    token2id_loaded: Token2Id = Token2Id.load(filename=filename)

    assert token2id_loaded is not None
    assert token2id_loaded.tf is not None

    assert token2id_loaded.data == {
        'adam': 0,
        'anton': 1,
        'beatrice': 2,
        'felicia': 3,
        'niklas': 4
    }
    assert dict(token2id_loaded.tf) == {0: 3, 1: 2, 2: 1, 3: 1, 4: 1}

Esempio n. 2

Mostra file

def test_interfaces_token2id_store():
    os.makedirs('./tests/output', exist_ok=True)

    filename: str = './tests/output/test_interfaces_token2id_store.zip'
    token2id = Token2Id()

    token2id.ingest(['apa', 'banan', 'soffa'])
    token2id.store(filename)

    assert pathlib.Path(filename).exists()

    token2id_loaded: Token2Id = Token2Id.load(filename)

    assert token2id.data == token2id_loaded.data

Esempio n. 3

Mostra file

def test_co_occurrences_to_co_occurrence_corpus():

    folder, tag = './tests/test_data/ABCDEFG_7DOCS_CONCEPT', "ABCDEFG_7DOCS_CONCEPT"

    co_occurrences: CoOccurrenceDataFrame = co_occurrence.load_co_occurrences(
        co_occurrence_filename(folder, tag))
    document_index: DocumentIndex = DocumentIndexHelper.load(
        document_index_filename(folder, tag)).document_index
    token2id: Token2Id = Token2Id.load(vocabulary_filename(folder, tag))

    corpus = LegacyCoOccurrenceMixIn.from_co_occurrences(
        co_occurrences=co_occurrences,
        document_index=document_index,
        token2id=token2id,
    )

    assert corpus.data.sum() == co_occurrences.value.sum()
    assert corpus.data.shape[0] == len(document_index)
    assert corpus.data.shape[1] == len(co_occurrences[["w1_id", "w2_id"
                                                       ]].drop_duplicates())