def test_index_converts_field_correctly(self): vocab = Vocabulary() sentence_index = vocab.add_token_to_namespace("sentence", namespace='words') capital_a_index = vocab.add_token_to_namespace("A", namespace='words') capital_a_char_index = vocab.add_token_to_namespace("A", namespace='characters') s_index = vocab.add_token_to_namespace("s", namespace='characters') e_index = vocab.add_token_to_namespace("e", namespace='characters') n_index = vocab.add_token_to_namespace("n", namespace='characters') t_index = vocab.add_token_to_namespace("t", namespace='characters') c_index = vocab.add_token_to_namespace("c", namespace='characters') field = TextField([Token(t) for t in ["A", "sentence"]], {"words": SingleIdTokenIndexer(namespace="words")}) field.index(vocab) # pylint: disable=protected-access assert field._indexed_tokens["words"] == [capital_a_index, sentence_index] field1 = TextField([Token(t) for t in ["A", "sentence"]], {"characters": TokenCharactersIndexer(namespace="characters")}) field1.index(vocab) assert field1._indexed_tokens["characters"] == [[capital_a_char_index], [s_index, e_index, n_index, t_index, e_index, n_index, c_index, e_index]] field2 = TextField([Token(t) for t in ["A", "sentence"]], token_indexers={"words": SingleIdTokenIndexer(namespace="words"), "characters": TokenCharactersIndexer(namespace="characters")}) field2.index(vocab) assert field2._indexed_tokens["words"] == [capital_a_index, sentence_index] assert field2._indexed_tokens["characters"] == [[capital_a_char_index], [s_index, e_index, n_index, t_index, e_index, n_index, c_index, e_index]]
def test_field_counts_vocab_items_correctly(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words")}) namespace_token_counts = defaultdict(lambda: defaultdict(int)) field.count_vocab_items(namespace_token_counts) assert namespace_token_counts["words"]["This"] == 1 assert namespace_token_counts["words"]["is"] == 1 assert namespace_token_counts["words"]["a"] == 1 assert namespace_token_counts["words"]["sentence"] == 1 assert namespace_token_counts["words"]["."] == 1 assert list(namespace_token_counts.keys()) == ["words"] field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"characters": TokenCharactersIndexer("characters")}) namespace_token_counts = defaultdict(lambda: defaultdict(int)) field.count_vocab_items(namespace_token_counts) assert namespace_token_counts["characters"]["T"] == 1 assert namespace_token_counts["characters"]["h"] == 1 assert namespace_token_counts["characters"]["i"] == 2 assert namespace_token_counts["characters"]["s"] == 3 assert namespace_token_counts["characters"]["a"] == 1 assert namespace_token_counts["characters"]["e"] == 3 assert namespace_token_counts["characters"]["n"] == 2 assert namespace_token_counts["characters"]["t"] == 1 assert namespace_token_counts["characters"]["c"] == 1 assert namespace_token_counts["characters"]["."] == 1 assert list(namespace_token_counts.keys()) == ["characters"] field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words"), "characters": TokenCharactersIndexer("characters")}) namespace_token_counts = defaultdict(lambda: defaultdict(int)) field.count_vocab_items(namespace_token_counts) assert namespace_token_counts["characters"]["T"] == 1 assert namespace_token_counts["characters"]["h"] == 1 assert namespace_token_counts["characters"]["i"] == 2 assert namespace_token_counts["characters"]["s"] == 3 assert namespace_token_counts["characters"]["a"] == 1 assert namespace_token_counts["characters"]["e"] == 3 assert namespace_token_counts["characters"]["n"] == 2 assert namespace_token_counts["characters"]["t"] == 1 assert namespace_token_counts["characters"]["c"] == 1 assert namespace_token_counts["characters"]["."] == 1 assert namespace_token_counts["words"]["This"] == 1 assert namespace_token_counts["words"]["is"] == 1 assert namespace_token_counts["words"]["a"] == 1 assert namespace_token_counts["words"]["sentence"] == 1 assert namespace_token_counts["words"]["."] == 1 assert set(namespace_token_counts.keys()) == {"words", "characters"}
def test_invalid_vocab_extension(self): vocab_dir = self.TEST_DIR / 'vocab_save' original_vocab = Vocabulary(non_padded_namespaces=["tokens1"]) original_vocab.add_token_to_namespace("a", namespace="tokens1") original_vocab.add_token_to_namespace("b", namespace="tokens1") original_vocab.add_token_to_namespace("p", namespace="tokens2") original_vocab.save_to_files(vocab_dir) text_field1 = TextField([Token(t) for t in ["a" "c"]], {"tokens1": SingleIdTokenIndexer("tokens1")}) text_field2 = TextField([Token(t) for t in ["p", "q", "r"]], {"tokens2": SingleIdTokenIndexer("tokens2")}) instances = Batch([Instance({"text1": text_field1, "text2": text_field2})]) # Following 2 should give error: token1 is non-padded in original_vocab but not in instances params = Params({"directory_path": vocab_dir, "extend": True, "non_padded_namespaces": []}) with pytest.raises(ConfigurationError): _ = Vocabulary.from_params(params, instances) with pytest.raises(ConfigurationError): extended_vocab = copy.copy(original_vocab) params = Params({"non_padded_namespaces": []}) extended_vocab.extend_from_instances(params, instances) with pytest.raises(ConfigurationError): extended_vocab = copy.copy(original_vocab) extended_vocab._extend(non_padded_namespaces=[], tokens_to_add={"tokens1": ["a"], "tokens2": ["p"]}) # Following 2 should not give error: overlapping namespaces have same padding setting params = Params({"directory_path": vocab_dir, "extend": True, "non_padded_namespaces": ["tokens1"]}) Vocabulary.from_params(params, instances) extended_vocab = copy.copy(original_vocab) params = Params({"non_padded_namespaces": ["tokens1"]}) extended_vocab.extend_from_instances(params, instances) extended_vocab = copy.copy(original_vocab) extended_vocab._extend(non_padded_namespaces=["tokens1"], tokens_to_add={"tokens1": ["a"], "tokens2": ["p"]}) # Following 2 should give error: token1 is padded in instances but not in original_vocab params = Params({"directory_path": vocab_dir, "extend": True, "non_padded_namespaces": ["tokens1", "tokens2"]}) with pytest.raises(ConfigurationError): _ = Vocabulary.from_params(params, instances) with pytest.raises(ConfigurationError): extended_vocab = copy.copy(original_vocab) params = Params({"non_padded_namespaces": ["tokens1", "tokens2"]}) extended_vocab.extend_from_instances(params, instances) with pytest.raises(ConfigurationError): extended_vocab = copy.copy(original_vocab) extended_vocab._extend(non_padded_namespaces=["tokens1", "tokens2"], tokens_to_add={"tokens1": ["a"], "tokens2": ["p"]})
def setUp(self): super(IteratorTest, self).setUp() self.token_indexers = {"tokens": SingleIdTokenIndexer()} self.vocab = Vocabulary() self.this_index = self.vocab.add_token_to_namespace('this') self.is_index = self.vocab.add_token_to_namespace('is') self.a_index = self.vocab.add_token_to_namespace('a') self.sentence_index = self.vocab.add_token_to_namespace('sentence') self.another_index = self.vocab.add_token_to_namespace('another') self.yet_index = self.vocab.add_token_to_namespace('yet') self.very_index = self.vocab.add_token_to_namespace('very') self.long_index = self.vocab.add_token_to_namespace('long') instances = [ self.create_instance(["this", "is", "a", "sentence"]), self.create_instance(["this", "is", "another", "sentence"]), self.create_instance(["yet", "another", "sentence"]), self.create_instance([ "this", "is", "a", "very", "very", "very", "very", "long", "sentence" ]), self.create_instance(["sentence"]), ] class LazyIterable: def __iter__(self): return (instance for instance in instances) self.instances = instances self.lazy_instances = LazyIterable()
def setUp(self): super().setUp() token_indexer = {"tokens": SingleIdTokenIndexer()} field1 = TextField( [Token(t) for t in ["this", "is", "a", "sentence", "."]], token_indexer) field2 = TextField([ Token(t) for t in ["this", "is", "a", "different", "sentence", "."] ], token_indexer) field3 = TextField( [Token(t) for t in ["here", "is", "a", "sentence", "."]], token_indexer) field4 = TextField([Token(t) for t in ["this", "is", "short"]], token_indexer) self.instances = [ Instance({ "text1": field1, "text2": field2 }), Instance({ "text1": field3, "text2": field4 }) ]
def __init__(self, max_span_width: int, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False) -> None: super().__init__(lazy) self._max_span_width = max_span_width self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
def setUp(self): token_indexer = SingleIdTokenIndexer("tokens") text_field = TextField([Token(t) for t in ["a", "a", "a", "a", "b", "b", "c", "c", "c"]], {"tokens": token_indexer}) self.instance = Instance({"text": text_field}) self.dataset = Batch([self.instance]) super(TestVocabulary, self).setUp()
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False, tokenizer: Tokenizer = None) -> None: super().__init__(lazy) self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()} self._tokenizer = tokenizer or WordTokenizer(SpacyWordSplitter(pos_tags=True))
def __init__(self, lazy: bool = False, tables_directory: str = None, dpd_output_directory: str = None, max_dpd_logical_forms: int = 10, sort_dpd_logical_forms: bool = True, max_dpd_tries: int = 20, keep_if_no_dpd: bool = False, tokenizer: Tokenizer = None, question_token_indexers: Dict[str, TokenIndexer] = None, table_token_indexers: Dict[str, TokenIndexer] = None, use_table_for_vocab: bool = False, linking_feature_extractors: List[str] = None, include_table_metadata: bool = False, max_table_tokens: int = None, output_agendas: bool = False) -> None: super().__init__(lazy=lazy) self._tables_directory = tables_directory self._dpd_output_directory = dpd_output_directory self._max_dpd_logical_forms = max_dpd_logical_forms self._sort_dpd_logical_forms = sort_dpd_logical_forms self._max_dpd_tries = max_dpd_tries self._keep_if_no_dpd = keep_if_no_dpd self._tokenizer = tokenizer or WordTokenizer( SpacyWordSplitter(pos_tags=True)) self._question_token_indexers = question_token_indexers or { "tokens": SingleIdTokenIndexer() } self._table_token_indexers = table_token_indexers or self._question_token_indexers self._use_table_for_vocab = use_table_for_vocab self._linking_feature_extractors = linking_feature_extractors self._include_table_metadata = include_table_metadata self._basic_types = set(str(type_) for type_ in wt_types.BASIC_TYPES) self._max_table_tokens = max_table_tokens self._output_agendas = output_agendas
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, tag_label: str = "ner", feature_labels: Sequence[str] = (), lazy: bool = False, coding_scheme: str = "IOB1", label_namespace: str = "labels", ignore_ner_tags: bool = False) -> None: super().__init__(lazy) self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() } if tag_label is not None and tag_label not in _VALID_LABELS: raise ConfigurationError( "unknown tag label type: {}".format(tag_label)) for label in feature_labels: if label not in _VALID_LABELS: raise ConfigurationError( "unknown feature label type: {}".format(label)) if coding_scheme not in ("IOB1", "BIOUL"): raise ConfigurationError( "unknown coding_scheme: {}".format(coding_scheme)) self.tag_label = tag_label self.feature_labels = set(feature_labels) self.coding_scheme = coding_scheme self.label_namespace = label_namespace self.ignore_ner_tags = ignore_ner_tags
def test_as_tensor_handles_words(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() tensor_dict = field.as_tensor(padding_lengths) numpy.testing.assert_array_almost_equal(tensor_dict["words"].detach().cpu().numpy(), numpy.array([1, 1, 1, 2, 1]))
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False, label_namespace_prefix: str = "") -> None: super().__init__(lazy=lazy) self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() } self._label_namespace_prefix = label_namespace_prefix
def __init__(self, lazy: bool = False, tokenizer: Tokenizer = None, token_indexers: Dict[str, TokenIndexer] = None) -> None: super().__init__(lazy) self._tokenizer = tokenizer or WordTokenizer(JustSpacesWordSplitter()) self._token_indexers = token_indexers or { "tokens": SingleIdTokenIndexer() }
def __init__(self, word_tag_delimiter: str = DEFAULT_WORD_TAG_DELIMITER, token_delimiter: str = None, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False) -> None: super().__init__(lazy) self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()} self._word_tag_delimiter = word_tag_delimiter self._token_delimiter = token_delimiter
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, use_language_specific_pos: bool = False, lazy: bool = False) -> None: super().__init__(lazy) self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() } self.use_language_specific_pos = use_language_specific_pos
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, domain_identifier: str = None, lazy: bool = False) -> None: super().__init__(lazy) self._token_indexers = token_indexers or { "tokens": SingleIdTokenIndexer() } self._domain_identifier = domain_identifier
def __init__(self, tokenizer: Tokenizer = None, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False) -> None: super().__init__(lazy) self._tokenizer = tokenizer or WordTokenizer() self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() }
def __init__(self, lazy: bool = False, tokenizer: Tokenizer = None, sentence_token_indexers: Dict[str, TokenIndexer] = None, nonterminal_indexers: Dict[str, TokenIndexer] = None, terminal_indexers: Dict[str, TokenIndexer] = None, output_agendas: bool = True) -> None: super().__init__(lazy) self._tokenizer = tokenizer or WordTokenizer() self._sentence_token_indexers = sentence_token_indexers or { "tokens": SingleIdTokenIndexer() } self._nonterminal_indexers = nonterminal_indexers or { "tokens": SingleIdTokenIndexer("rule_labels") } self._terminal_indexers = terminal_indexers or { "tokens": SingleIdTokenIndexer("rule_labels") } self._output_agendas = output_agendas
def test_padding_lengths_are_computed_correctly(self): field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == {"num_tokens": 5} field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"characters": TokenCharactersIndexer("characters")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == {"num_tokens": 5, "num_token_characters": 8} field = TextField([Token(t) for t in ["This", "is", "a", "sentence", "."]], token_indexers={"characters": TokenCharactersIndexer("characters"), "words": SingleIdTokenIndexer("words")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() assert padding_lengths == {"num_tokens": 5, "num_token_characters": 8}
def setUp(self): self.vocab = Vocabulary() self.vocab.add_token_to_namespace("this") self.vocab.add_token_to_namespace("is") self.vocab.add_token_to_namespace("a") self.vocab.add_token_to_namespace("sentence") self.vocab.add_token_to_namespace(".") self.token_indexer = {"tokens": SingleIdTokenIndexer()} self.instances = self.get_instances() super(TestDataset, self).setUp()
def test_count_vocab_items_respects_casing(self): indexer = SingleIdTokenIndexer("words") counter = defaultdict(lambda: defaultdict(int)) indexer.count_vocab_items(Token("Hello"), counter) indexer.count_vocab_items(Token("hello"), counter) assert counter["words"] == {"hello": 1, "Hello": 1} indexer = SingleIdTokenIndexer("words", lowercase_tokens=True) counter = defaultdict(lambda: defaultdict(int)) indexer.count_vocab_items(Token("Hello"), counter) indexer.count_vocab_items(Token("hello"), counter) assert counter["words"] == {"hello": 2}
def __init__(self, tokenizer: Tokenizer = None, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False, num_context_answers: int = 0) -> None: super().__init__(lazy) self._tokenizer = tokenizer or WordTokenizer() self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() } self._num_context_answers = num_context_answers
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, use_pos_tags: bool = True, lazy: bool = False, label_namespace_prefix: str = "", pos_label_namespace: str = "pos") -> None: super().__init__(lazy=lazy) self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() } self._use_pos_tags = use_pos_tags self._label_namespace_prefix = label_namespace_prefix self._pos_label_namespace = pos_label_namespace
def setUp(self): self.vocab = Vocabulary() self.vocab.add_token_to_namespace("this", "words") self.vocab.add_token_to_namespace("is", "words") self.vocab.add_token_to_namespace("a", "words") self.vocab.add_token_to_namespace("sentence", 'words') self.vocab.add_token_to_namespace("s", 'characters') self.vocab.add_token_to_namespace("e", 'characters') self.vocab.add_token_to_namespace("n", 'characters') self.vocab.add_token_to_namespace("t", 'characters') self.vocab.add_token_to_namespace("c", 'characters') for label in ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k']: self.vocab.add_token_to_namespace(label, 'labels') self.word_indexer = {"words": SingleIdTokenIndexer("words")} self.words_and_characters_indexers = { "words": SingleIdTokenIndexer("words"), "characters": TokenCharactersIndexer("characters") } self.field1 = TextField( [Token(t) for t in ["this", "is", "a", "sentence"]], self.word_indexer) self.field2 = TextField( [Token(t) for t in ["this", "is", "a", "different", "sentence"]], self.word_indexer) self.field3 = TextField( [Token(t) for t in ["this", "is", "another", "sentence"]], self.word_indexer) self.empty_text_field = self.field1.empty_field() self.index_field = IndexField(1, self.field1) self.empty_index_field = self.index_field.empty_field() self.sequence_label_field = SequenceLabelField([1, 1, 0, 1], self.field1) self.empty_sequence_label_field = self.sequence_label_field.empty_field( ) super(TestListField, self).setUp()
def __init__(self, tokens_per_instance: int = None, tokenizer: Tokenizer = None, token_indexers: Dict[str, TokenIndexer] = None, lazy: bool = False) -> None: super().__init__(lazy) self._tokenizer = tokenizer or WordTokenizer() self._token_indexers = token_indexers or { "tokens": SingleIdTokenIndexer() } self._tokens_per_instance = tokens_per_instance # No matter how you want to represent the input, we'll always represent the output as a # single token id. This code lets you learn a language model that concatenates word # embeddings with character-level encoders, in order to predict the word token that comes # next. self._output_indexer: Dict[str, TokenIndexer] = None for name, indexer in self._token_indexers.items(): if isinstance(indexer, SingleIdTokenIndexer): self._output_indexer = {name: indexer} break else: self._output_indexer = {"tokens": SingleIdTokenIndexer()}
def __init__(self, source_tokenizer: Tokenizer = None, target_tokenizer: Tokenizer = None, source_token_indexers: Dict[str, TokenIndexer] = None, target_token_indexers: Dict[str, TokenIndexer] = None, source_add_start_token: bool = True, lazy: bool = False) -> None: super().__init__(lazy) self._source_tokenizer = source_tokenizer or WordTokenizer() self._target_tokenizer = target_tokenizer or self._source_tokenizer self._source_token_indexers = source_token_indexers or { "tokens": SingleIdTokenIndexer() } self._target_token_indexers = target_token_indexers or self._source_token_indexers self._source_add_start_token = source_add_start_token
def test_max_vocab_size_partial_dict(self): indexers = {"tokens": SingleIdTokenIndexer(), "token_characters": TokenCharactersIndexer()} instance = Instance({ 'text': TextField([Token(w) for w in 'Abc def ghi jkl mno pqr stu vwx yz'.split(' ')], indexers) }) dataset = Batch([instance]) params = Params({ "max_vocab_size": { "tokens": 1 } }) vocab = Vocabulary.from_params(params=params, instances=dataset) assert len(vocab.get_index_to_token_vocabulary("tokens").values()) == 3 # 1 + 2 assert len(vocab.get_index_to_token_vocabulary("token_characters").values()) == 28 # 26 + 2
def __init__(self, token_indexers: Dict[str, TokenIndexer] = None, use_subtrees: bool = False, granularity: str = "5-class", lazy: bool = False) -> None: super().__init__(lazy=lazy) self._token_indexers = token_indexers or { 'tokens': SingleIdTokenIndexer() } self._use_subtrees = use_subtrees allowed_granularities = ["5-class", "3-class", "2-class"] if granularity not in allowed_granularities: raise ConfigurationError( "granularity is {}, but expected one of: {}".format( granularity, allowed_granularities)) self._granularity = granularity
def test_as_tensor_handles_words_and_characters_with_longer_lengths(self): field = TextField([Token(t) for t in ["a", "sentence", "."]], token_indexers={"words": SingleIdTokenIndexer("words"), "characters": TokenCharactersIndexer("characters")}) field.index(self.vocab) padding_lengths = field.get_padding_lengths() padding_lengths["num_tokens"] = 5 padding_lengths["num_token_characters"] = 10 tensor_dict = field.as_tensor(padding_lengths) numpy.testing.assert_array_almost_equal(tensor_dict["words"].detach().cpu().numpy(), numpy.array([1, 2, 1, 0, 0])) numpy.testing.assert_array_almost_equal(tensor_dict["characters"].detach().cpu().numpy(), numpy.array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [3, 4, 5, 6, 4, 5, 7, 4, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]))
def setUp(self): self.tokenizer = WordTokenizer(SpacyWordSplitter(pos_tags=True)) self.utterance = self.tokenizer.tokenize("where is mersin?") self.token_indexers = {"tokens": SingleIdTokenIndexer("tokens")} json = { 'question': self.utterance, 'columns': ['Name in English', 'Location in English'], 'cells': [['Paradeniz', 'Mersin'], ['Lake Gala', 'Edirne']] } self.graph = TableQuestionKnowledgeGraph.read_from_json(json) self.vocab = Vocabulary() self.name_index = self.vocab.add_token_to_namespace("name", namespace='tokens') self.in_index = self.vocab.add_token_to_namespace("in", namespace='tokens') self.english_index = self.vocab.add_token_to_namespace( "english", namespace='tokens') self.location_index = self.vocab.add_token_to_namespace( "location", namespace='tokens') self.paradeniz_index = self.vocab.add_token_to_namespace( "paradeniz", namespace='tokens') self.mersin_index = self.vocab.add_token_to_namespace( "mersin", namespace='tokens') self.lake_index = self.vocab.add_token_to_namespace("lake", namespace='tokens') self.gala_index = self.vocab.add_token_to_namespace("gala", namespace='tokens') self.negative_one_index = self.vocab.add_token_to_namespace( "-1", namespace='tokens') self.zero_index = self.vocab.add_token_to_namespace("0", namespace='tokens') self.one_index = self.vocab.add_token_to_namespace("1", namespace='tokens') self.oov_index = self.vocab.get_token_index('random OOV string', namespace='tokens') self.edirne_index = self.oov_index self.field = KnowledgeGraphField(self.graph, self.utterance, self.token_indexers, self.tokenizer) super(KnowledgeGraphFieldTest, self).setUp()