Esempio n. 1
0
    def attention(
        self, query: tf.Tensor, decoder_prev_state: tf.Tensor,
        decoder_input: tf.Tensor, loop_state: AttentionLoopState
    ) -> Tuple[tf.Tensor, AttentionLoopState]:

        with tf.variable_scope(self.att_scope_name):
            projected_state = tf.layers.dense(query, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])

            logits = []

            for proj, bias in zip(self.encoder_projections_for_logits,
                                  self.encoder_attn_biases):

                logits.append(
                    tf.reduce_sum(
                        self.attn_v * tf.tanh(projected_state + proj), [2]) +
                    bias)

            if self._use_sentinels:
                sentinel_value = _sentinel(query, decoder_prev_state,
                                           decoder_input)
                projected_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                logits.append(sentinel_logit)

            attentions = self._renorm_softmax(tf.concat(logits, 1))

            self.attentions_in_time.append(attentions)

            if self._use_sentinels:
                tiled_encoder_projections = self._tile_encoders_for_beamsearch(
                    projected_sentinel)

                projections_concat = tf.concat(
                    tiled_encoder_projections + [projected_sentinel], 1)

            else:
                projections_concat = tf.concat(
                    self.encoder_projections_for_ctx, 1)

            contexts = tf.reduce_sum(
                tf.expand_dims(attentions, 2) * projections_concat, [1])

            next_contexts = tf.concat(
                [loop_state.contexts,
                 tf.expand_dims(contexts, 0)], 0)
            next_weights = tf.concat(
                [loop_state.weights,
                 tf.expand_dims(attentions, 0)], 0)

            next_loop_state = AttentionLoopState(contexts=next_contexts,
                                                 weights=next_weights)

            return contexts, next_loop_state
    def attention(
        self, query: tf.Tensor, decoder_prev_state: tf.Tensor,
        decoder_input: tf.Tensor, loop_state: AttentionLoopState
    ) -> Tuple[tf.Tensor, AttentionLoopState]:
        context = tf.reshape(self.attention_states,
                             [-1, self.context_vector_size])
        weights = tf.ones(shape=[self.batch_size, 1])

        next_contexts = tf.concat(
            [loop_state.contexts,
             tf.expand_dims(context, 0)], 0)
        next_weights = tf.concat(
            [loop_state.weights,
             tf.expand_dims(weights, 0)], 0)
        next_loop_state = AttentionLoopState(contexts=next_contexts,
                                             weights=next_weights)

        return context, next_loop_state
Esempio n. 3
0
    def attention(self,
                  query: tf.Tensor,
                  decoder_prev_state: tf.Tensor,
                  decoder_input: tf.Tensor,
                  loop_state: AttentionLoopState) -> Tuple[
                      tf.Tensor, AttentionLoopState]:
        self.query_state_size = query.get_shape()[-1].value

        y = tf.matmul(query, self.query_projection_matrix)
        y = y + self.projection_bias_vector
        y = tf.reshape(y, [-1, 1, 1, self.state_size])

        energies = self.get_energies(y, loop_state.weights)

        if self.attention_mask is None:
            weights = tf.nn.softmax(energies)
        else:
            weights_all = tf.nn.softmax(energies) * self.attention_mask
            norm = tf.reduce_sum(weights_all, 1, keep_dims=True) + 1e-8
            weights = weights_all / norm

            # condition = tf.equal(self.attention_mask, 1)
            # masked_logits = tf.where(
            #     tf.tile(condition, [tf.shape(energies)[0], 1]),
            #     energies, -np.inf * tf.ones_like(energies))
            # weights = tf.nn.softmax(masked_logits)

        # Now calculate the attention-weighted vector d.
        context = tf.reduce_sum(
            tf.expand_dims(tf.expand_dims(weights, -1), -1)
            * self._att_states_reshaped, [1, 2])
        context = tf.reshape(context, [-1, self.context_vector_size])

        next_contexts = tf.concat(
            [loop_state.contexts, tf.expand_dims(context, 0)], 0)
        next_weights = tf.concat(
            [loop_state.weights, tf.expand_dims(weights, 0)], 0)
        next_loop_state = AttentionLoopState(
            contexts=next_contexts,
            weights=next_weights)

        return context, next_loop_state
Esempio n. 4
0
    def attention(
        self, query: tf.Tensor, decoder_prev_state: tf.Tensor,
        decoder_input: tf.Tensor, loop_state: HierarchicalLoopState
    ) -> Tuple[tf.Tensor, HierarchicalLoopState]:

        with tf.variable_scope(self.att_scope_name):
            projected_state = tf.layers.dense(query, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])
            attn_ctx_vectors, child_loop_states = zip(*[
                a.attention(query, decoder_prev_state, decoder_input, ls)
                for a, ls in zip(self.attentions, loop_state.child_loop_states)
            ])

            proj_ctxs, attn_logits = [
                list(t) for t in zip(*[
                    self._vector_logit(projected_state,
                                       ctx_vec,
                                       scope=att.name)  # type: ignore
                    for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)
                ])
            ]

            if self._use_sentinels:
                sentinel_value = _sentinel(query, decoder_prev_state,
                                           decoder_input)
                proj_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                proj_ctxs.append(proj_sentinel)
                attn_logits.append(sentinel_logit)

            attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1))
            self.attentions_in_time.append(attention_distr)

            if self._share_projections:
                output_cxts = proj_ctxs
            else:
                output_cxts = [
                    tf.expand_dims(
                        tf.layers.dense(ctx_vec,
                                        self.attention_state_size,
                                        name="proj_attn_{}".format(att.name)),
                        1)  # type: ignore
                    for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)
                ]
                if self._use_sentinels:
                    output_cxts.append(
                        tf.expand_dims(
                            tf.layers.dense(sentinel_value,
                                            self.attention_state_size,
                                            name="proj_sentinel"), 1))

            projections_concat = tf.concat(output_cxts, 1)
            context = tf.reduce_sum(
                tf.expand_dims(attention_distr, 2) * projections_concat, [1])

            prev_loop_state = loop_state.loop_state

            next_contexts = tf.concat(
                [prev_loop_state.contexts,
                 tf.expand_dims(context, 0)], axis=0)
            next_weights = tf.concat(
                [prev_loop_state.weights,
                 tf.expand_dims(attention_distr, 0)],
                axis=0)

            next_loop_state = AttentionLoopState(contexts=next_contexts,
                                                 weights=next_weights)

            next_hier_loop_state = HierarchicalLoopState(
                child_loop_states=list(child_loop_states),
                loop_state=next_loop_state)

            return context, next_hier_loop_state