def _reshape_mask(mask, head_num): if mask is None: return mask seq_len = K.shape(mask)[1] mask = K.expand_dims(mask, axis=1) mask = K.tile(mask, K.stack([1, head_num, 1])) return K.reshape(mask, (-1, seq_len))
def call(self, inputs, **kwargs): if self.mode == self.MODE_EXPAND: if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') return K.gather( self.embeddings, K.minimum(K.maximum(inputs, -self.input_dim), self.input_dim) + self.input_dim, ) input_shape = K.shape(inputs) if self.mode == self.MODE_ADD: batch_size, seq_len, output_dim = input_shape[0], input_shape[ 1], input_shape[2] else: batch_size, seq_len, output_dim = input_shape[0], input_shape[ 1], self.output_dim pos_embeddings = K.tile( K.expand_dims(self.embeddings[:seq_len, :self.output_dim], axis=0), K.stack([batch_size, 1, 1]), ) if self.mode == self.MODE_ADD: return inputs + pos_embeddings return K.concatenate([inputs, pos_embeddings], axis=-1)
def compute_mask(self, inputs, mask=None): token_mask = K.not_equal(inputs[1], 0) masked = K.all(K.stack([token_mask, mask[0]], axis=0), axis=0) if self.return_masked: return [masked, None] return masked