def not_done(self, i): y = self.score * tf.cast(self.flag, tf.floatx()) y = tf.reduce_min(y, axis=1) fs = tf.reduce_any(self.flags, axis=1) old = y + (1. - tf.cast(fs, tf.floatx())) * utils.big_neg n = tf.int_shape(self.tgt)[-1] new = self.logp[:, 0] / self.penalty(n) done = tf.reduce_all(tf.greater(old, new)) return tf.logical_and(tf.less(i, n), tf.logical_not(done))
def call(self, inputs, mask=None): x, typ = inputs y = typ if mask is not None: y *= tf.cast(mask, typ.dtype) y = tf.one_hot(y, self.cfg.tok_types) return x + tf.einsum('bie,eh->bih', y, self.typ_w)
def to_scores(self, qk, mask, v): b = 0 if mask is not None: b = tf.logical_not(mask) b = tf.cast(b, tf.floatx()) * utils.big_neg() if self.proxim_b is not None: b += self.proxim_b b = b[:, None, :, None] y = tf.softmax(qk * self.scale + b) cfg = self.cfg y = self.drop(y, cfg.drop_attn or cfg.drop_hidden) y = tf.einsum('bnij,bnjv->bniv', y, v) return y
def call(self, inputs, mask=None): x = inputs y = self.pos_b if mask is not None: y *= tf.cast(mask, self.pos_b.dtype) return x + y
def penalty(self, n): n = tf.cast(n, tf.floatx()) y = tf.pow(((5. + n) / 6.), self.cfg.beam_alpha) return y
def top_out(self, x, lp, i): cfg = self.cfg score = lp / self.penalty(i + 1) flag = tf.equal(x[:, :, -1], cfg.END) score += (1. - tf.cast(flag, tf.floatx())) * utils.big_neg return self.top_beams([x, score, flag], score)
def top_tgt(self, x, lp): cfg = self.cfg fs = tf.equal(x[:, :, -1], cfg.END) lp += tf.cast(fs, tf.floatx()) * utils.big_neg return self.top_beams([x, lp], lp)