def _encoder(self):
     with tf.variable_scope('encoder'):
         if self.args.encoder_type == 'rnn':
             y, _ = cu_rnn('bi-gru', self.token_emb, int(self.args.n_emb / 2), self.n_batch, self.args.n_layer)
         elif self.args.encoder_type == 'cnn':
             y = cnn(self.token_emb, self.mask, self.args.n_emb, 3)
         elif self.args.encoder_type == 'ffn':
             y = ffn(self.token_emb, int(self.args.n_emb * 2), self.args.n_emb,
                     self.args.dropout_keep_prob if self.is_train else 1)
         self.token_encoder = residual_link(self.token_emb, y, self.args.dropout_keep_prob if self.is_train else 1.0)
Example #2
0
 def _self_attention(self):
     with tf.variable_scope('self_attention'):
         attn_bias = attention_bias(self.mask, 'masking')
         self.n_hidden = self.args.n_emb
         for i in range(self.args.n_block):
             with tf.variable_scope('block_{}'.format(i)):
                 y = multihead_attention(self.token_encoder,
                                         None,
                                         attn_bias,
                                         self.args.n_emb,
                                         self.args.n_emb,
                                         self.n_hidden,
                                         self.args.n_head,
                                         self.args.dropout_keep_prob,
                                         attention_function='dot_product')
                 self.token_encoder = residual_link(
                     self.token_encoder, y, self.args.dropout_keep_prob)