def not_done(self, i): y = self.score * torch.cast(self.flag, torch.floatx()) y = torch.reduce_min(y, axis=1) fs = torch.reduce_any(self.flags, axis=1) old = y + (1.0 - torch.cast(fs, torch.floatx())) * utils.big_neg n = torch.int_shape(self.tgt)[-1] new = self.logp[:, 0] / self.penalty(n) done = torch.reduce_all(torch.greater(old, new)) return torch.logical_and(torch.less(i, n), torch.logical_not(done))
def forward(self, inputs): cfg = self.cfg x, tgt = inputs if cfg.brackets: y = torch.zeros_like(tgt, dtype=torch.floatx()) bs = cfg.brackets + [cfg.num_toks] b = 0 for i, e in enumerate(bs): msk = (tgt >= (b or 1)) & (tgt < e) mt = torch.boolean_mask(tgt, msk) - b gi = torch.stack([torch.range(torch.shape(mt)[0]), mt]) if i == 0: logp = torch.log_softmax(self.logits(x, i)) mp = torch.boolean_mask(logp, msk) u = torch.gather_nd(mp, gi) else: mp = torch.boolean_mask(logp, msk) u = mp[:, bs[i - 1]] mc = torch.boolean_mask(x, msk)[None] mp = torch.log_softmax(self.logits(mc, i)) mp = torch.squeeze(mp, 0) u += torch.gather_nd(mp, gi) y = torch.tensor_scatter_nd_add(y, torch.where(msk), -u) b = e else: y = self.logits(x) # f = torch.SparseCategoricalAccuracy # self.add_metric(f(name='acc')(tgt, y)) f = torch.sparse_softmax_cross_entropy_with_logits loss = f(labels=tgt, logits=y) # self.add_loss(lambda: torch.reduce_mean(loss)) return y
def to_scores(self, qk, mask, v): b = 0 if mask is not None: b = torch.logical_not(mask) b = torch.cast(b, torch.floatx()) * qu.big_neg() if self.proxim_b is not None: b += self.proxim_b b = b[:, None, :, None] y = torch.softmax(qk * self.scale + b) cfg = self.cfg y = self.drop(y, cfg.drop_attn or cfg.drop) y = torch.einsum("bnij,bnjv->bniv", y, v) return y
def penalty(self, n): n = torch.cast(n, torch.floatx()) y = torch.pow(((5.0 + n) / 6.0), self.cfg.beam_alpha) return y
def top_out(self, x, lp, i): cfg = self.cfg score = lp / self.penalty(i + 1) flag = torch.equal(x[:, :, -1], cfg.END) score += (1.0 - torch.cast(flag, torch.floatx())) * utils.big_neg return self.top_beams([x, score, flag], score)
def top_tgt(self, x, lp): cfg = self.cfg fs = torch.equal(x[:, :, -1], cfg.END) lp += torch.cast(fs, torch.floatx()) * utils.big_neg return self.top_beams([x, lp], lp)