def _load_processed_data(self): """ Load processed data from the disk Returns ------- train_examples : List[Tuple] Processed train examples. Each tuple consists of question_id, record_index, context_tokens_indices, question_tokens_indices, context_chars_indices, question_char_indices, start_token_index_of_the_answer, end_token_index_of_the_answer, context, context_tokens_spans dev_examples : List[Tuple] Processed dev examples. Each tuple consists of question_id, record_index, context_tokens_indices, question_tokens_indices, context_chars_indices, question_char_indices, start_token_index_of_the_answer, end_token_index_of_the_answer, context, context_tokens_spans word_vocab : Vocab Word-level vocabulary char_vocab : Vocab Char-level vocabulary """ with open(os.path.join(self._data_root_path, self._processed_train_data_file_name), 'r') as f: train_examples = json.load(f) with open(os.path.join(self._data_root_path, self._processed_dev_data_file_name), 'r') as f: dev_examples = json.load(f) with open(os.path.join(self._data_root_path, self._word_vocab_file_name), 'r') as f: word_vocab = Vocab.from_json(json.load(f)) with open(os.path.join(self._data_root_path, self._char_vocab_file_name), 'r') as f: char_vocab = Vocab.from_json(json.load(f)) return train_examples, dev_examples, word_vocab, char_vocab
def vocab(self): path = os.path.join(self._path, 'vocab.json') with io.open(path, 'r', encoding='utf-8') as in_file: return Vocab.from_json(in_file.read())