Ejemplo n.º 1
0
    def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]):
        vocab = self.shared_resources.vocab
        if not vocab.frozen:
            preprocessing.fill_vocab(
                (q for q, _ in data),
                vocab,
                lowercase=self.shared_resources.config.get('lowercase', True))
            vocab.freeze()
            if vocab.emb is not None:
                self.shared_resources.embeddings = np.zeros(
                    [len(vocab), vocab.emb_length])
                for w, i in self.shared_resources.vocab.sym2id.items():
                    e = vocab.emb.get(w)
                    if e is not None:
                        self.shared_resources.embeddings[i] = e

        if not hasattr(self.shared_resources, 'answer_vocab'
                       ) or not self.shared_resources.answer_vocab.frozen:
            self.shared_resources.answer_vocab = util.create_answer_vocab(
                qa_settings=(q for q, _ in data),
                answers=(a for _, ass in data for a in ass))
            self.shared_resources.answer_vocab.freeze()
        self.shared_resources.config['answer_size'] = len(
            self.shared_resources.answer_vocab)
        self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab(
            self.shared_resources.vocab)
Ejemplo n.º 2
0
 def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]):
     # create character vocab + word lengths + char ids per word
     if not self.shared_resources.vocab.frozen:
         preprocessing.fill_vocab(
             (q for q, _ in data), self.shared_resources.vocab,
             self.shared_resources.config.get("lowercase", False))
         self.shared_resources.vocab.freeze()
     self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab(
         self.shared_resources.vocab)
Ejemplo n.º 3
0
 def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]):
     vocab = self.shared_resources.vocab
     if not vocab.frozen:
         preprocessing.fill_vocab(
             (q for q, _ in data), vocab, lowercase=self.shared_resources.config.get('lowercase', True))
         vocab.freeze()
     if not hasattr(self.shared_resources, 'answer_vocab') or not self.shared_resources.answer_vocab.frozen:
         self.shared_resources.answer_vocab = util.create_answer_vocab(
             qa_settings=(q for q, _ in data), answers=(a for _, ass in data for a in ass))
         self.shared_resources.answer_vocab.freeze()
     self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab(self.shared_resources.vocab)
Ejemplo n.º 4
0
 def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]):
     # create character vocab + word lengths + char ids per word
     self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab(self.shared_resources.vocab)
Ejemplo n.º 5
0
 def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]):
     # create character vocab + word lengths + char ids per word
     self.shared_vocab_config.char_vocab = char_vocab_from_vocab(
         self.shared_vocab_config.vocab)