Python Field.finalize 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: podium.field

클래스/타입: Field

메소드/함수: finalize

hotexamples.com에서의 예제들: 9

Python Field.finalize - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 podium.field.Field.finalize에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Field(30)

preprocess(23)

add_posttokenize_hook(11)

finalize(9)

numericalize(8)

add_pretokenize_hook(7)

_pad_to_length(3)

get_default_value(2)

remove_posttokenize_hooks(1)

remove_pretokenize_hooks(1)

예제 #1

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_field_finalize():
    vocab = MockVocab()
    f = Field(name="F", numericalizer=vocab)

    assert not vocab.is_finalized
    f.finalize()
    assert vocab.is_finalized
    with pytest.raises(Exception):
        f.finalize()

예제 #2

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_missing_symbol_index_custom_numericalize():
    fld = Field(
        name="test_field",
        keep_raw=True,
        tokenizer=None,
        numericalizer=int,
        allow_missing_data=True,
    )

    fld.finalize()
    assert fld.get_default_value() == -1

예제 #3

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_field_custom_numericalization_no_tokenization():
    tfield = Field("bla", numericalizer=lambda x: x, tokenizer=None)

    _, data1 = tfield.preprocess([1, 2, 3])[0]
    _, data2 = tfield.preprocess([3, 2, 1])[0]
    _, data3 = tfield.preprocess([3, 4, 5, 6])[0]
    _, data4 = tfield.preprocess([2, 3, 6])[0]

    tfield.finalize()

    assert np.all(tfield.numericalize(data1) == np.array([1, 2, 3]))
    assert np.all(tfield.numericalize(data2) == np.array([3, 2, 1]))
    assert np.all(tfield.numericalize(data3) == np.array([3, 4, 5, 6]))
    assert np.all(tfield.numericalize(data4) == np.array([2, 3, 6]))

예제 #4

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_field_custom_numericalization_vocab_non_string():
    vocab = Vocab(specials=())
    tfield = Field("bla", numericalizer=vocab, tokenizer=None)

    _, data1 = tfield.preprocess([1, 2, 3])[0]
    _, data2 = tfield.preprocess([3, 2, 1])[0]
    _, data3 = tfield.preprocess([3, 4, 5, 6])[0]
    _, data4 = tfield.preprocess([2, 3, 6])[0]

    tfield.finalize()

    assert np.all(tfield.numericalize(data1) == vocab.numericalize([1, 2, 3]))
    assert np.all(tfield.numericalize(data2) == vocab.numericalize([3, 2, 1]))
    assert np.all(tfield.numericalize(data3) == vocab.numericalize([3, 4, 5, 6]))
    assert np.all(tfield.numericalize(data4) == vocab.numericalize([2, 3, 6]))

예제 #5

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_field_custom_numericalization_no_tokenization_2():
    label_indexer = {"one": 1, "two": 2, "three": 3, "four": 4}

    tfield = Field("bla", numericalizer=label_indexer.get, tokenizer=None)

    _, data1 = tfield.preprocess(["one", "two", "three"])[0]
    _, data2 = tfield.preprocess(["three", "two", "one"])[0]
    _, data3 = tfield.preprocess(["three", "four", "four", "two"])[0]
    _, data4 = tfield.preprocess(["two", "three", "one"])[0]

    tfield.finalize()

    assert np.all(tfield.numericalize(data1) == np.array([1, 2, 3]))
    assert np.all(tfield.numericalize(data2) == np.array([3, 2, 1]))
    assert np.all(tfield.numericalize(data3) == np.array([3, 4, 4, 2]))
    assert np.all(tfield.numericalize(data4) == np.array([2, 3, 1]))

예제 #6

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_missing_symbol_index_vocab():
    vocab = Vocab()
    fld = Field(
        name="test_field",
        tokenizer="split",
        keep_raw=False,
        numericalizer=vocab,
        allow_missing_data=True,
    )

    fld.preprocess("a b c d")
    ((_, data),) = fld.preprocess(None)
    assert data == (None, None)

    fld.finalize()
    assert fld.numericalize((None, None)) is None
    assert fld.get_default_value() == -1

예제 #7

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_missing_values_default_sequential():
    fld = Field(
        name="bla",
        keep_raw=False,
        tokenizer="split",
        numericalizer=hash,
        allow_missing_data=True,
    )

    _, data_missing = fld.preprocess(None)[0]
    _, data_exists = fld.preprocess("data_string")[0]

    assert data_missing == (None, None)
    assert data_exists == (None, ["data_string"])
    fld.finalize()

    assert fld.numericalize(data_missing) is None
    assert np.all(fld.numericalize(data_exists) == np.array([hash("data_string")]))

예제 #8

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_missing_values_custom_numericalize():
    fld = Field(
        name="test_field",
        keep_raw=True,
        tokenizer=None,
        numericalizer=int,
        allow_missing_data=True,
    )

    _, data_missing = fld.preprocess(None)[0]
    _, data_exists = fld.preprocess("404")[0]

    assert data_missing == (None, None)
    assert data_exists == ("404", "404")

    fld.finalize()

    assert fld.numericalize(data_missing) is None
    assert np.all(fld.numericalize(data_exists) == np.array([404]))

예제 #9

0

파일 보기

파일: test_field.py 프로젝트: TakeLab/podium

def test_field_vocab_no_tokenization():
    vocab = Vocab(eager=True)
    pretokenized_input1 = ["word", "words", "uttering"]
    pretokenized_input2 = ["word", "words"]
    pretokenized_input3 = ["word"]

    pretokenized_input4 = ["word", "uttering"]

    tokenized_field = Field("test_field", tokenizer=None, numericalizer=vocab)

    _, data1 = tokenized_field.preprocess(pretokenized_input1)[0]
    _, data2 = tokenized_field.preprocess(pretokenized_input2)[0]
    _, data3 = tokenized_field.preprocess(pretokenized_input3)[0]
    _, data4 = tokenized_field.preprocess(pretokenized_input4)[0]

    tokenized_field.finalize()

    expected_numericalization_1 = np.array([2, 3, 4])
    _, tok1 = data1
    assert np.all(vocab.numericalize(tok1) == expected_numericalization_1)
    assert np.all(tokenized_field.numericalize(data1) == expected_numericalization_1)

    expected_numericalization_2 = np.array([2, 3])
    _, tok2 = data2
    assert np.all(vocab.numericalize(tok2) == expected_numericalization_2)
    assert np.all(tokenized_field.numericalize(data2) == expected_numericalization_2)

    expected_numericalization_3 = np.array([2])
    _, tok3 = data3
    assert np.all(vocab.numericalize(tok3) == expected_numericalization_3)
    assert np.all(tokenized_field.numericalize(data3) == expected_numericalization_3)

    expected_numericalization_4 = np.array([2, 4])
    _, tok4 = data4
    assert np.all(vocab.numericalize(tok4) == expected_numericalization_4)
    assert np.all(tokenized_field.numericalize(data4) == expected_numericalization_4)