Python Vocab.finalizeの例

プログラミング言語: Python

名前空間/パッケージ名: podium.vocab

クラス/型: Vocab

メソッド/関数: finalize

hotexamples.comのコード掲載数: 7

Python Vocab.finalize - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpodium.vocab.Vocab.finalizeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Vocab(30)

finalize(7)

numericalize(2)

よく使われるメソッド

Vocab (30)

finalize (7)

numericalize (2)

コード例 #1

ファイルを表示

ファイル: test_tfidf.py プロジェクト: TakeLab/podium

def test_count_matrix_specials_indexes():
    specials = (UNK(), PAD())
    vocab = Vocab(specials=specials)
    for i in DATA:
        vocab += i.split(" ")
    vocab.finalize()

    count_vectorizer = CountVectorizer(vocab=vocab)
    count_vectorizer._init_special_indexes()

    assert len(count_vectorizer._special_indexes) == 2
    for i in specials:
        assert vocab.stoi[i] in count_vectorizer._special_indexes

コード例 #2

ファイルを表示

ファイル: test_tfidf.py プロジェクト: TakeLab/podium

def test_specials_indexes():
    specials = (UNK(), PAD())
    vocab = Vocab(specials=specials)
    for i in DATA:
        vocab += i.split(" ")
    vocab.finalize()

    tfidf = TfIdfVectorizer(vocab=vocab)
    tfidf._init_special_indexes()

    assert len(tfidf._special_indexes) == 2
    for i in specials:
        assert vocab.stoi[i] in tfidf._special_indexes

コード例 #3

ファイルを表示

ファイル: test_tfidf.py プロジェクト: TakeLab/podium

def test_build_count_matrix_costum_specials_vocab_with_specials():
    vocab = Vocab(specials=(UNK(), PAD()))
    vocab_words = ["this", "is", "the", "first", "document"]
    vocab += vocab_words
    vocab.finalize()
    tfidf = TfIdfVectorizer(vocab=vocab, specials=[PAD(), "this", "first"])
    tfidf._init_special_indexes()

    numericalized_data = get_numericalized_data(data=DATA, vocab=vocab)
    count_matrix = tfidf._build_count_matrix(
        data=numericalized_data, unpack_data=tfidf._get_tensor_values)
    expected = np.array([[0, 1, 1, 1], [1, 1, 1, 2], [3, 1, 1, 0],
                         [0, 1, 1, 1]])
    assert np.all(count_matrix == expected)

コード例 #4

ファイルを表示

ファイル: test_field.py プロジェクト: TakeLab/podium

def test_label_field():
    vocab = Vocab(specials=())
    data = ["label_1", "label_2", "label_3"]

    vocab += data
    vocab.finalize()

    label_field = LabelField("test_label_field", numericalizer=vocab)

    preprocessed_data = [label_field.preprocess(label) for label in data]

    for x in preprocessed_data:
        _, data = x[0]
        _, tokenized = data
        assert label_field.numericalize(data) == vocab.stoi[tokenized]

コード例 #5

ファイルを表示

ファイル: test_tfidf.py プロジェクト: TakeLab/podium

def test_build_count_matrix_costum_specials_vocab_without_specials():
    vocab = Vocab(specials=())
    for i in DATA:
        vocab += i.split(" ")
    vocab.finalize()
    tfidf = TfIdfVectorizer(
        vocab=vocab,
        specials=["the", "first", "second", "one", "third", "and"])
    tfidf._init_special_indexes()

    numericalized_data = get_numericalized_data(data=DATA, vocab=vocab)
    count_matrix = tfidf._build_count_matrix(
        data=numericalized_data, unpack_data=tfidf._get_tensor_values)
    expected = np.array([[1, 1, 1], [1, 1, 2], [1, 1, 0], [1, 1, 1]])
    assert np.all(count_matrix == expected)

コード例 #6

ファイルを表示

ファイル: test_tfidf.py プロジェクト: TakeLab/podium

def test_count_vectorizer_transform_tokens_tensor():
    vocab = Vocab(specials=())
    for i in DATA:
        vocab += i.split(" ")
    vocab.finalize()
    count_vectorizer = CountVectorizer(vocab=vocab)
    count_vectorizer.fit(dataset=None, field=None)

    numericalized_data = get_numericalized_data(data=DATA, vocab=vocab)
    bow = count_vectorizer.transform(numericalized_data).todense()
    expected = np.array([
        [1, 1, 1, 1, 1, 0, 0, 0, 0],
        [1, 1, 1, 2, 0, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0, 1, 1, 1],
        [1, 1, 1, 1, 1, 0, 0, 0, 0],
    ])
    assert np.allclose(a=bow, b=expected, rtol=0, atol=1.0e-6)

コード例 #7

ファイルを表示

ファイル: test_tfidf.py プロジェクト: TakeLab/podium

def test_build_count_matrix_from_tensor_with_specials():
    vocab = Vocab(specials=(UNK(), PAD()))
    for i in DATA:
        vocab += i.split(" ")
    vocab.finalize()
    tfidf = TfIdfVectorizer(vocab=vocab)
    tfidf._init_special_indexes()

    numericalized_data = get_numericalized_data(data=DATA, vocab=vocab)
    count_matrix = tfidf._build_count_matrix(
        data=numericalized_data, unpack_data=tfidf._get_tensor_values)
    expected = np.array([
        [1, 1, 1, 1, 1, 0, 0, 0, 0],
        [1, 1, 1, 2, 0, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0, 1, 1, 1],
        [1, 1, 1, 1, 1, 0, 0, 0, 0],
    ])
    assert np.all(count_matrix == expected)