def _forward(self, in_toks, lens=None, seg_id=0): if lens is None: # if no lens provided, assume all are full length, I guess... not great lens = torch.full_like(in_toks[:, 0], in_toks.shape[1]) maxlen = self.bert.config.max_position_embeddings MAX_TOK_LEN = maxlen - 2 # -2 for [CLS] and [SEP] toks, _ = util.subbatch(in_toks, MAX_TOK_LEN) mask = util.lens2mask(lens, in_toks.shape[1]) mask, _ = util.subbatch(mask, MAX_TOK_LEN) toks = torch.cat([torch.full_like(toks[:, :1], self.CLS), toks], dim=1) toks = torch.cat([toks, torch.full_like(toks[:, :1], self.SEP)], dim=1) ONES = torch.ones_like(mask[:, :1]) mask = torch.cat([ONES, mask, ONES], dim=1) segment_ids = torch.full_like(toks, seg_id) # Change -1 padding to 0-padding (will be masked) toks = torch.where(toks == -1, torch.zeros_like(toks), toks) result = self.bert(toks, segment_ids, mask) if not self.vocab.config['last_layer']: cls_result = [r[:, 0] for r in result] result = [r[:, 1:-1, :] for r in result] result = [ util.un_subbatch(r, in_toks, MAX_TOK_LEN) for r in result ] else: BATCH = in_toks.shape[0] result = result[-1] cls_output = result[:, 0] cls_result = [] for i in range(cls_output.shape[0] // BATCH): cls_result.append(cls_output[i * BATCH:(i + 1) * BATCH]) cls_result = torch.stack(cls_result, dim=2).mean(dim=2) result = result[:, 1:-1, :] result = util.un_subbatch(result, in_toks, MAX_TOK_LEN) return result, cls_result
def enc_query_doc(self, **inputs): query_tok, query_len = inputs['query_tok'], inputs['query_len'] doc_tok, doc_len = inputs['doc_tok'], inputs['doc_len'] BATCH, QLEN = query_tok.shape maxlen = self.bert.config.max_position_embeddings MAX_DOC_TOK_LEN = maxlen - QLEN - 3 # -3 [CLS] and 2x[SEP] doc_toks, sbcount = util.subbatch(doc_tok, MAX_DOC_TOK_LEN) doc_mask = util.lens2mask(doc_len, doc_tok.shape[1]) doc_mask, _ = util.subbatch(doc_mask, MAX_DOC_TOK_LEN) query_toks = torch.cat([query_tok] * sbcount, dim=0) query_mask = util.lens2mask(query_len, query_toks.shape[1]) query_mask = torch.cat([query_mask] * sbcount, dim=0) CLSS = torch.full_like(query_toks[:, :1], self.CLS) SEPS = torch.full_like(query_toks[:, :1], self.SEP) ONES = torch.ones_like(query_mask[:, :1]) NILS = torch.zeros_like(query_mask[:, :1]) toks = torch.cat([CLSS, query_toks, SEPS, doc_toks, SEPS], dim=1) mask = torch.cat([ONES, query_mask, ONES, doc_mask, ONES], dim=1) segment_ids = torch.cat([NILS] * (2 + QLEN) + [ONES] * (1 + doc_toks.shape[1]), dim=1) # Change -1 padding to 0-padding (will be masked) toks = torch.where(toks == -1, torch.zeros_like(toks), toks) result = self.bert(toks, segment_ids, mask) # extract relevant subsequences for query and doc query_results = [r[:BATCH, 1:QLEN + 1] for r in result] doc_results = [r[:, QLEN + 2:-1] for r in result] doc_results = [ util.un_subbatch(r, doc_tok, MAX_DOC_TOK_LEN) for r in doc_results ] cls_results = [] for layer in range(len(result)): cls_output = result[layer][:, 0] cls_result = [] for i in range(cls_output.shape[0] // BATCH): cls_result.append(cls_output[i * BATCH:(i + 1) * BATCH]) cls_result = torch.stack(cls_result, dim=2).mean(dim=2) cls_results.append(cls_result) if self.vocab.config['last_layer']: query_results = query_results[-1] doc_results = doc_results[-1] cls_results = cls_results[-1] return {'query': query_results, 'doc': doc_results, 'cls': cls_results}