def test_build_vocab():
    field = TextField(pad_token='<pad>', unk_token='<unk>')
    field._build_vocab()
    assert field.vocab == {'<pad>': 0, '<unk>': 1}

    dummy = ["justo Praesent luctus", "luctus praesent"]
    field._build_vocab(dummy)

    vocab = {'<pad>': 0, '<unk>': 1, 'justo': 2, 'Praesent': 3,
             'luctus': 4, 'praesent': 5}
    assert field.vocab == vocab
def test_build_vocab_decorators_missing_specials():
    field = TextField(pad_token=None, unk_token=None,
                      sos_token='<sos>', eos_token='<eos>')
    field._build_vocab()

    assert field.vocab == {'<sos>': 0, '<eos>': 1}
    dummy = ["justo Praesent luctus", "luctus praesent"]
    field._build_vocab(dummy)

    vocab = {'<sos>': 0, '<eos>': 1, 'justo': 2, 'Praesent': 3, 'luctus': 4, 'praesent': 5}
    assert field.vocab == vocab
Beispiel #3
0
def test_build_vocab_list():
    field = TextField()
    dummy = [["justo Praesent luctus", "luctus praesent"],
             ["justo Praesent luctus", "luctus praesent est"]]
    field._build_vocab(dummy)

    vocab = {
        '<pad>': 0,
        '<unk>': 1,
        'justo': 2,
        'Praesent': 3,
        'luctus': 4,
        'praesent': 5,
        'est': 6
    }
    assert field.vocab == vocab
Beispiel #4
0
def test_build_vocab_nested_list_in_dict():
    field = TextField()
    dummy = [{
        'text1': ["justo Praesent luctus", "luctus praesent"],
        'text2': ["justo Praesent luctus", "luctus praesent est"]
    }]
    field._build_vocab(dummy)

    vocab = {
        '<pad>': 0,
        '<unk>': 1,
        'justo': 2,
        'Praesent': 3,
        'luctus': 4,
        'praesent': 5,
        'est': 6
    }
    assert field.vocab == vocab