Beispiel #1
0
    def __init__(self, hparams, is_train):
        super(DecoderStack, self).__init__()
        self.my_layers = []

        self.hparams = hparams
        self_attention_layer = SelfAttention(hparams['num_units'],
                                             hparams['num_heads'],
                                             hparams['dropout_rate'], is_train)
        enc_dec_attention_layer = MultiheadAttention(hparams['num_units'],
                                                     hparams['num_heads'],
                                                     hparams['dropout_rate'],
                                                     is_train)
        ffn_layer = FeedForwardNetwork(hparams['num_units'],
                                       hparams['num_filter_units'],
                                       hparams['dropout_rate'], is_train)
        self.self_attention_wrapper = LayerWrapper(self_attention_layer,
                                                   hparams['num_units'],
                                                   hparams['dropout_rate'],
                                                   is_train)
        self.enc_dec_attention_wrapper = LayerWrapper(enc_dec_attention_layer,
                                                      hparams['num_units'],
                                                      hparams['dropout_rate'],
                                                      is_train)
        self.ffn_wrapper = LayerWrapper(ffn_layer, hparams['num_units'],
                                        hparams['dropout_rate'], is_train)
        self.output_norm = LayerNormalization(hparams['num_units'])
        self.pondering_layer = tf.keras.layers.Dense(
            1,
            activation=tf.nn.sigmoid,
            use_bias=True,
            bias_initializer=tf.constant_initializer(1.0))
Beispiel #2
0
    def call(self, decoder_inputs, encoder_outputs,
             decoder_self_attention_bias, attention_bias):
        batch_size, length, hidden_size = tf.unstack(tf.shape(decoder_inputs))
        act = ACT(batch_size, length, hidden_size)
        halt_threshold = 1.0 - self.hparams.act_epsilon

        state = decoder_inputs
        previous_state = tf.zeros_like(state, name='previous_state')
        for step in range(self.hparams.act_max_step):
            # judge to continue
            if not act.should_continue(halt_threshold):
                break

            # position and timestep encoding
            state += model_utils.get_position_encoding(self.hparams.max_length,
                                                       hidden_size)
            state += model_utils.get_timestep_encoding(
                step, self.hparams.act_max_step, hidden_size)

            # to judge pondering
            pondering = self.pondering_layer(state)
            pondering = tf.squeeze(pondering, axis=-1)

            # proceed act step
            update_weights = act(pondering, halt_threshold)

            update_weights = act(pondering, halt_threshold)

            if (num_head_3logit):
                self_attention_layer = SelfAttention(hparams.num_units, 3,
                                                     hparams.dropout_rate,
                                                     is_train)
            elif (num_head_5logit):
                self_attention_layer = SelfAttention(hparams.num_units, 5,
                                                     hparams.dropout_rate,
                                                     is_train)

            ffn_layer = FeedForwardNetwork(hparams.num_units,
                                           hparams.num_filter_units,
                                           hparams.dropout_rate, is_train)
            self.self_attention_wrapper = LayerWrapper(self_attention_layer,
                                                       hparams.num_units,
                                                       hparams.dropout_rate,
                                                       is_train)
            self.ffn_wrapper = LayerWrapper(ffn_layer, hparams.num_units,
                                            hparams.dropout_rate, is_train)
            self.output_norm = LayerNormalization(hparams.num_units)

            state = self.self_attention_wrapper(state,
                                                decoder_self_attention_bias)
            state = self.enc_dec_attention_wrapper(state, encoder_outputs,
                                                   attention_bias)
            state = self.ffn_wrapper(state)

            # update new state and previous state
            new_state = (state * update_weights) + (previous_state *
                                                    (1 - update_weights))
            previous_state = new_state

        return self.output_norm(new_state), act.n_updates, act.remainders
Beispiel #3
0
    def __init__(self, hparams, is_train):
        super(DecoderStack, self).__init__()
        self.my_layers = []

        for i in range(hparams['num_layers']):
            self_attention_layer = SelfAttention(hparams['num_units'],
                                                 hparams['num_heads'],
                                                 hparams['dropout_rate'],
                                                 is_train)
            enc_dec_attention_layer = MultiheadAttention(
                hparams['num_units'], hparams['num_heads'],
                hparams['dropout_rate'], is_train)
            ffn_layer = FeedForwardNetwork(hparams['num_units'],
                                           hparams['num_filter_units'],
                                           hparams['dropout_rate'], is_train)
            self.my_layers.append([
                LayerWrapper(self_attention_layer, hparams['num_units'],
                             hparams['dropout_rate'], is_train),
                LayerWrapper(enc_dec_attention_layer, hparams['num_units'],
                             hparams['dropout_rate'], is_train),
                LayerWrapper(ffn_layer, hparams['num_units'],
                             hparams['dropout_rate'], is_train),
            ])

        self.output_norm = LayerNormalization(hparams['num_units'])
Beispiel #4
0
    def __init__(self, hparams, is_train):
        super(EncoderStack, self).__init__()
        self.hparams = hparams

        self_attention_layer = SelfAttention(hparams.num_units,
                                             hparams.num_heads,
                                             hparams.dropout_rate, is_train)
        ffn_layer = FeedForwardNetwork(hparams.num_units,
                                       hparams.num_filter_units,
                                       hparams.dropout_rate, is_train)
        self.self_attention_wrapper = LayerWrapper(self_attention_layer,
                                                   hparams.num_units,
                                                   hparams.dropout_rate,
                                                   is_train)
        self.ffn_wrapper = LayerWrapper(ffn_layer, hparams.num_units,
                                        hparams.dropout_rate, is_train)
        self.output_norm = LayerNormalization(hparams.num_units)
        self.pondering_layer = tf.keras.layers.Dense(
            1,
            activation=tf.nn.sigmoid,
            use_bias=True,
            bias_initializer=tf.constant_initializer(1.0))
        self.num_head_layer = tf.keras.layers.Dense(
            1,
            activation=tf.nn.sigmoid,
            use_bias=True,
            bias_initializer=tf.constant_initializer(1.0))