def _encoder(self): with tf.variable_scope('encoder'): if self.args.encoder_type == 'rnn': y, _ = cu_rnn('bi-gru', self.token_emb, int(self.args.n_emb / 2), self.n_batch, self.args.n_layer) elif self.args.encoder_type == 'cnn': y = cnn(self.token_emb, self.mask, self.args.n_emb, 3) elif self.args.encoder_type == 'ffn': y = ffn(self.token_emb, int(self.args.n_emb * 2), self.args.n_emb, self.args.dropout_keep_prob if self.is_train else 1) self.token_encoder = residual_link(self.token_emb, y, self.args.dropout_keep_prob if self.is_train else 1.0)
def _self_attention(self): with tf.variable_scope('self_attention'): attn_bias = attention_bias(self.mask, 'masking') self.n_hidden = self.args.n_emb for i in range(self.args.n_block): with tf.variable_scope('block_{}'.format(i)): y = multihead_attention(self.token_encoder, None, attn_bias, self.args.n_emb, self.args.n_emb, self.n_hidden, self.args.n_head, self.args.dropout_keep_prob, attention_function='dot_product') self.token_encoder = residual_link( self.token_encoder, y, self.args.dropout_keep_prob)