def test_build_vocab(): field = TextField(pad_token='<pad>', unk_token='<unk>') field._build_vocab() assert field.vocab == {'<pad>': 0, '<unk>': 1} dummy = ["justo Praesent luctus", "luctus praesent"] field._build_vocab(dummy) vocab = {'<pad>': 0, '<unk>': 1, 'justo': 2, 'Praesent': 3, 'luctus': 4, 'praesent': 5} assert field.vocab == vocab
def test_build_vocab_decorators_missing_specials(): field = TextField(pad_token=None, unk_token=None, sos_token='<sos>', eos_token='<eos>') field._build_vocab() assert field.vocab == {'<sos>': 0, '<eos>': 1} dummy = ["justo Praesent luctus", "luctus praesent"] field._build_vocab(dummy) vocab = {'<sos>': 0, '<eos>': 1, 'justo': 2, 'Praesent': 3, 'luctus': 4, 'praesent': 5} assert field.vocab == vocab
def test_build_vocab_list(): field = TextField() dummy = [["justo Praesent luctus", "luctus praesent"], ["justo Praesent luctus", "luctus praesent est"]] field._build_vocab(dummy) vocab = { '<pad>': 0, '<unk>': 1, 'justo': 2, 'Praesent': 3, 'luctus': 4, 'praesent': 5, 'est': 6 } assert field.vocab == vocab
def test_build_vocab_nested_list_in_dict(): field = TextField() dummy = [{ 'text1': ["justo Praesent luctus", "luctus praesent"], 'text2': ["justo Praesent luctus", "luctus praesent est"] }] field._build_vocab(dummy) vocab = { '<pad>': 0, '<unk>': 1, 'justo': 2, 'Praesent': 3, 'luctus': 4, 'praesent': 5, 'est': 6 } assert field.vocab == vocab