def call(self, x, mask=None): h = K.dot(x, self.W1) if self.use_bias: h = K.bias_add(h, self.b1) if self.activation is not None: h = self.activation(h) y = K.dot(h, self.W2) if self.use_bias: y = K.bias_add(y, self.b2) return y
def call(self, inputs, mask=None): if isinstance(inputs, list): q, k, v = inputs else: q = k = v = inputs if isinstance(mask, list): q_mask, k_mask, v_mask = mask else: q_mask = k_mask = v_mask = mask q = K.dot(q, self.Wq) k = K.dot(k, self.Wk) v = K.dot(v, self.Wv) if self.use_bias: q += self.bq k += self.bk v += self.bv if self.activation is not None: q = self.activation(q) k = self.activation(k) v = self.activation(v) y = ScaledDotProductAttention( history_only=self.history_only, name='%s-Attention' % self.name, )( inputs=[ self._reshape_to_batches(q, self.head_num), self._reshape_to_batches(k, self.head_num), self._reshape_to_batches(v, self.head_num), ], mask=[ self._reshape_mask(q_mask, self.head_num), self._reshape_mask(k_mask, self.head_num), self._reshape_mask(v_mask, self.head_num), ], ) y = self._reshape_from_batches(y, self.head_num) y = K.dot(y, self.Wo) if self.use_bias: y += self.bo if self.activation is not None: y = self.activation(y) y = K.reshape(y, (-1, 512, 768)) return y
def call(self, inputs, mask=None, **kwargs): inputs, embeddings = inputs outputs = K.bias_add(K.dot(inputs, K.transpose(embeddings)), self.bias) return keras.activations.softmax(outputs)