def __call__(self, raw_article_sents, raw_clusters): self._net.eval() n_art = len(raw_article_sents) articles = conver2id(UNK, self._word2id, raw_article_sents) clusters = (conver2id(UNK, self._word2id, raw_clusters[0]), raw_clusters[1], raw_clusters[2]) article = pad_batch_tensorize(articles, PAD, cuda=False, max_num=5).to(self._device) clusters = (pad_batch_tensorize(clusters[0], PAD, cuda=False, max_num=4).to(self._device), pad_batch_tensorize(clusters[1], PAD, cuda=False, max_num=4).to(self._device), pad_batch_tensorize(clusters[2], PAD, cuda=False, max_num=4).to(self._device)) if raw_clusters == []: print(clusters) indices = self._net.extract([article], clusters, k=min(n_art, self._max_ext)) return indices
def __call__(self, raw_article_sents, raw_query): self._net.eval() n_art = len(raw_article_sents) articles = conver2id(UNK, self._word2id, raw_article_sents) queries = conver2id(UNK, self._word2id, raw_query) article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) query = pad_batch_tensorize(queries, PAD, cuda=False ).to(self._device) indices = self._net.extract([article], k=min(n_art, self._max_ext), queries=[query]) return indices
def __call__(self, raw_article_sents): self._net.eval() n_art = len(raw_article_sents) articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) indices = self._net.extract([article], k=min(n_art, self._max_ext)) return indices
def _prepro(self, raw_article_sents): ext_word2id = dict(self._word2id) ext_id2word = dict(self._id2word) for raw_words in raw_article_sents: for w in raw_words: if not w in ext_word2id: ext_word2id[w] = len(ext_word2id) ext_id2word[len(ext_id2word)] = w articles = conver2id(UNK, self._word2id, raw_article_sents) art_lens = [len(art) for art in articles] article = pad_batch_tensorize(articles, PAD, cuda=False).to(self._device) extend_arts = conver2id(UNK, ext_word2id, raw_article_sents) extend_art = pad_batch_tensorize(extend_arts, PAD, cuda=False).to(self._device) extend_vsize = len(ext_word2id) dec_args = (article, art_lens, extend_art, extend_vsize, START, END, UNK, self._max_len) return dec_args, ext_id2word
def __call__(self, raw_article_sents, sent_labels): self._net.eval() n_art = len(raw_article_sents) articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False).to(self._device) indices = self._net.extract([article], [sent_labels]) return indices
def _prepro(self, raw_article_sents): ext_word2id = dict(self._word2id) ext_id2word = dict(self._id2word) for raw_words in raw_article_sents: for w in raw_words: if not w in ext_word2id: ext_word2id[w] = len(ext_word2id) ext_id2word[len(ext_id2word)] = w articles = conver2id(UNK, self._word2id, raw_article_sents) art_lens = [len(art) for art in articles] article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) extend_arts = conver2id(UNK, ext_word2id, raw_article_sents) extend_art = pad_batch_tensorize(extend_arts, PAD, cuda=False ).to(self._device) extend_vsize = len(ext_word2id) dec_args = (article, art_lens, extend_art, extend_vsize, START, END, UNK, self._max_len) return dec_args, ext_id2word
def __call__(self, raw_article_sents, raw_abs_sents=None): if self.net_type == 'ml_rnn_extractor': articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) elif self.net_type == 'ml_trans_rnn_extractor': # print([" ".join(r) for r in raw_article_sents]) # print([" ".join(r) for r in raw_abs_sents]) article = myextract.get_batch_trans([([" ".join(r) for r in raw_article_sents], [" ".join(r) for r in raw_abs_sents])]) return article
def __call__(self, raw_article_sents): self._net.eval() n_art = len(raw_article_sents) articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False, max_num=5 ).to(self._device) if not self.force_ext: indices = self._net.extract([article], k=min(n_art, self._max_ext), force_ext=self.force_ext) else: indices = self._net.extract([article], k=min(n_art, self._max_ext)) return indices
def _prepro(self, raw_article_sents): ext_word2id = dict(self._word2id) ext_id2word = dict(self._id2word) articles = conver2id(UNK, self._word2id, raw_article_sents) art_lens = [len(art) for art in articles] article = pad_batch_tensorize(articles, PAD, cuda=False).to(self._device) dec_args = (article, art_lens, START, END, UNK, self._max_len) return dec_args, ext_id2word
def __call__(self, raw_article_sents): self._net.eval() n_art = len(raw_article_sents) if self._emb_type == 'W2V': articles = conver2id(UNK, self._word2id, raw_article_sents) else: articles = [self._tokenizer.convert_tokens_to_ids(sentence) for sentence in raw_article_sents] article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) indices = self._net.extract([article], k=min(n_art, self._max_ext)) return indices
def __call__(self, raw_article_sents): self._net.eval() n_art = len(raw_article_sents) if self._net_type == 'ml_trans_rnn_extractor': n_art = len(raw_article_sents[0]) batch = myextract.get_batch_trans([raw_article_sents]) indices = self._net.extract(batch, k=min(n_art, self._max_ext)) return indices, batch else: articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) indices = self._net.extract([article], k=min(n_art, self._max_ext)) return indices
def __call__(self, raw_article_sents): articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False).to(self._device) return article
def __call__(self, raw_article_sents): articles = conver2id(UNK, self._word2id, raw_article_sents) article = pad_batch_tensorize(articles, PAD, cuda=False ).to(self._device) return article