def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]): vocab = self.shared_resources.vocab if not vocab.frozen: preprocessing.fill_vocab( (q for q, _ in data), vocab, lowercase=self.shared_resources.config.get('lowercase', True)) vocab.freeze() if vocab.emb is not None: self.shared_resources.embeddings = np.zeros( [len(vocab), vocab.emb_length]) for w, i in self.shared_resources.vocab.sym2id.items(): e = vocab.emb.get(w) if e is not None: self.shared_resources.embeddings[i] = e if not hasattr(self.shared_resources, 'answer_vocab' ) or not self.shared_resources.answer_vocab.frozen: self.shared_resources.answer_vocab = util.create_answer_vocab( qa_settings=(q for q, _ in data), answers=(a for _, ass in data for a in ass)) self.shared_resources.answer_vocab.freeze() self.shared_resources.config['answer_size'] = len( self.shared_resources.answer_vocab) self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab( self.shared_resources.vocab)
def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]): # create character vocab + word lengths + char ids per word if not self.shared_resources.vocab.frozen: preprocessing.fill_vocab( (q for q, _ in data), self.shared_resources.vocab, self.shared_resources.config.get("lowercase", False)) self.shared_resources.vocab.freeze() self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab( self.shared_resources.vocab)
def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]): vocab = self.shared_resources.vocab if not vocab.frozen: preprocessing.fill_vocab( (q for q, _ in data), vocab, lowercase=self.shared_resources.config.get('lowercase', True)) vocab.freeze() if not hasattr(self.shared_resources, 'answer_vocab') or not self.shared_resources.answer_vocab.frozen: self.shared_resources.answer_vocab = util.create_answer_vocab( qa_settings=(q for q, _ in data), answers=(a for _, ass in data for a in ass)) self.shared_resources.answer_vocab.freeze() self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab(self.shared_resources.vocab)
def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]): # create character vocab + word lengths + char ids per word self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab(self.shared_resources.vocab)
def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]): # create character vocab + word lengths + char ids per word self.shared_vocab_config.char_vocab = char_vocab_from_vocab( self.shared_vocab_config.vocab)