Ejemplo n.º 1
0
    def get_encoder_projections(self, scope: str) -> List[tf.Tensor]:
        encoder_projections = []
        with tf.variable_scope(scope):
            for i, encoder_tensor in enumerate(self._encoders_tensors):
                encoder_state_size = encoder_tensor.get_shape()[2].value
                encoder_tensor_shape = tf.shape(encoder_tensor)

                proj_matrix = get_variable(
                    "proj_matrix_{}".format(i),
                    [encoder_state_size, self.attention_state_size],
                    initializer=tf.random_normal_initializer(stddev=0.001))

                proj_bias = get_variable(
                    "proj_bias_{}".format(i),
                    shape=[self.attention_state_size],
                    initializer=tf.zeros_initializer())

                encoder_tensor_2d = tf.reshape(
                    encoder_tensor, [-1, encoder_state_size])

                projected_2d = tf.matmul(
                    encoder_tensor_2d, proj_matrix) + proj_bias
                assert_shape(projected_2d, [-1, self.attention_state_size])

                projection = tf.reshape(
                    projected_2d, [encoder_tensor_shape[0],
                                   encoder_tensor_shape[1],
                                   self.attention_state_size])

                encoder_projections.append(projection)
            return encoder_projections
Ejemplo n.º 2
0
    def encoded(self) -> tf.Tensor:
        """Output vector of the CNN.

        If there are specified some fully connected layers, there are applied
        on top of the last convolutional map. Dropout is applied between all
        layers, default activation function is ReLU. There are only projection
        layers, no softmax is applied.

        If there is fully_connected layer specified, average-pooled last
        convolutional map is used as a vector output.
        """
        # pylint: disable=no-member
        last_height, last_width, last_n_channels = [
            s.value for s in self.states.get_shape()[1:]
        ]
        # pylint: enable=no-member

        if self.fully_connected is None:
            # we average out by the image size -> shape is number
            # channels from the last convolution
            encoded = tf.reduce_mean(self.states, [1, 2])
            assert_shape(encoded, [None, self.convolutions[-1][1]])
            return encoded

        states_flat = tf.reshape(
            self.states, [-1, last_width * last_height * last_n_channels])
        return multilayer_projection(states_flat,
                                     self.fully_connected,
                                     activation=tf.nn.relu,
                                     dropout_keep_prob=self.dropout_keep_prob,
                                     train_mode=self.train_mode)
Ejemplo n.º 3
0
    def get_encoder_projections(self, scope):
        encoder_projections = []
        with tf.variable_scope(scope):
            for i, encoder_tensor in enumerate(self._encoders_tensors):
                encoder_state_size = encoder_tensor.get_shape()[2].value
                encoder_tensor_shape = tf.shape(encoder_tensor)

                proj_matrix = get_variable(
                    "proj_matrix_{}".format(i),
                    [encoder_state_size, self.attention_state_size],
                    initializer=tf.random_normal_initializer(stddev=0.001))

                proj_bias = get_variable("proj_bias_{}".format(i),
                                         shape=[self.attention_state_size],
                                         initializer=tf.zeros_initializer())

                encoder_tensor_2d = tf.reshape(encoder_tensor,
                                               [-1, encoder_state_size])

                projected_2d = tf.matmul(encoder_tensor_2d,
                                         proj_matrix) + proj_bias
                assert_shape(projected_2d, [-1, self.attention_state_size])

                projection = tf.reshape(projected_2d, [
                    encoder_tensor_shape[0], encoder_tensor_shape[1],
                    self.attention_state_size
                ])

                encoder_projections.append(projection)
            return encoder_projections
    def __call__(self, inputs, state, scope=None):
        output, new_state = self._cell(inputs, state)

        # self._mask is of shape [batch_size, state_size]
        # new_state is of shape [batch_size, state_size] (hopefully)
        new_state_dropped = new_state * self._scale * self._mask
        assert_shape(new_state_dropped, [None, self._cell.sate_size])
        return output, new_state_dropped
Ejemplo n.º 5
0
    def _encoders_masks(self) -> List[tf.Tensor]:
        masks = [get_attention_mask(e) for e in self._encoders]
        for e_m in masks:
            assert_shape(e_m, [-1, -1])

        if self._use_sentinels:
            masks.append(tf.ones([tf.shape(masks[0])[0], 1]))
        return masks
Ejemplo n.º 6
0
    def __call__(self, inputs, state, scope=None):
        output, new_state = self._cell(inputs, state)

        # self._mask is of shape [batch_size, state_size]
        # new_state is of shape [batch_size, state_size] (hopefully)
        new_state_dropped = new_state * self._scale * self._mask
        assert_shape(new_state_dropped, [None, self._cell.sate_size])
        return output, new_state_dropped
Ejemplo n.º 7
0
    def _encoders_masks(self) -> List[tf.Tensor]:
        masks = [get_attention_mask(e) for e in self._encoders]
        for e_m in masks:
            assert_shape(e_m, [-1, -1])

        if self._use_sentinels:
            masks.append(tf.ones([tf.shape(masks[0])[0], 1]))
        return masks
Ejemplo n.º 8
0
    def __init__(self,
                 name: str,
                 encoders: List[Attendable],
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        MultiAttention.__init__(self,
                                name=name,
                                attention_state_size=attention_state_size,
                                share_attn_projections=share_attn_projections,
                                use_sentinels=use_sentinels,
                                reuse=reuse,
                                save_checkpoint=save_checkpoint,
                                load_checkpoint=load_checkpoint,
                                initializers=initializers)
        self._encoders = encoders

        # pylint: disable=protected-access
        self._encoders_tensors = [
            get_attention_states(e) for e in self._encoders
        ]
        self._encoders_masks = [get_attention_mask(e) for e in self._encoders]
        # pylint: enable=protected-access

        for e_m in self._encoders_masks:
            assert_shape(e_m, [-1, -1])

        for e_t in self._encoders_tensors:
            assert_shape(e_t, [-1, -1, -1])

        with self.use_scope():
            self.encoder_projections_for_logits = \
                self.get_encoder_projections("logits_projections")

            self.encoder_attn_biases = [
                get_variable(name="attn_bias_{}".format(i),
                             shape=[],
                             initializer=tf.zeros_initializer())
                for i in range(len(self._encoders_tensors))
            ]

            if self._share_projections:
                self.encoder_projections_for_ctx = \
                    self.encoder_projections_for_logits
            else:
                self.encoder_projections_for_ctx = \
                    self.get_encoder_projections("context_projections")

            if self._use_sentinels:
                self._encoders_masks.append(
                    tf.ones([tf.shape(self._encoders_masks[0])[0], 1]))

            self.masks_concat = tf.concat(self._encoders_masks, 1)
Ejemplo n.º 9
0
    def attention(
        self, query: tf.Tensor, decoder_prev_state: tf.Tensor,
        decoder_input: tf.Tensor, loop_state: AttentionLoopState
    ) -> Tuple[tf.Tensor, AttentionLoopState]:

        with tf.variable_scope(self.att_scope_name):
            projected_state = tf.layers.dense(query, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])

            logits = []

            for proj, bias in zip(self.encoder_projections_for_logits,
                                  self.encoder_attn_biases):

                logits.append(
                    tf.reduce_sum(
                        self.attn_v * tf.tanh(projected_state + proj), [2]) +
                    bias)

            if self._use_sentinels:
                sentinel_value = _sentinel(query, decoder_prev_state,
                                           decoder_input)
                projected_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                logits.append(sentinel_logit)

            attentions = self._renorm_softmax(tf.concat(logits, 1))

            self.attentions_in_time.append(attentions)

            if self._use_sentinels:
                tiled_encoder_projections = self._tile_encoders_for_beamsearch(
                    projected_sentinel)

                projections_concat = tf.concat(
                    tiled_encoder_projections + [projected_sentinel], 1)

            else:
                projections_concat = tf.concat(
                    self.encoder_projections_for_ctx, 1)

            contexts = tf.reduce_sum(
                tf.expand_dims(attentions, 2) * projections_concat, [1])

            next_contexts = tf.concat(
                [loop_state.contexts,
                 tf.expand_dims(contexts, 0)], 0)
            next_weights = tf.concat(
                [loop_state.weights,
                 tf.expand_dims(attentions, 0)], 0)

            next_loop_state = AttentionLoopState(contexts=next_contexts,
                                                 weights=next_weights)

            return contexts, next_loop_state
Ejemplo n.º 10
0
    def attention(self,
                  query: tf.Tensor,
                  decoder_prev_state: tf.Tensor,
                  decoder_input: tf.Tensor,
                  loop_state: AttentionLoopState) -> Tuple[
                      tf.Tensor, AttentionLoopState]:

        with tf.variable_scope(self.att_scope_name):
            projected_state = tf.layers.dense(query, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])

            logits = []

            for proj, bias in zip(self.encoder_projections_for_logits,
                                  self.encoder_attn_biases):

                logits.append(tf.reduce_sum(
                    self.attn_v * tf.tanh(projected_state + proj), [2]) + bias)

            if self._use_sentinels:
                sentinel_value = _sentinel(query,
                                           decoder_prev_state,
                                           decoder_input)
                projected_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                logits.append(sentinel_logit)

            attentions = self._renorm_softmax(tf.concat(logits, 1))

            self.attentions_in_time.append(attentions)

            if self._use_sentinels:
                tiled_encoder_projections = self._tile_encoders_for_beamsearch(
                    projected_sentinel)

                projections_concat = tf.concat(
                    tiled_encoder_projections + [projected_sentinel], 1)

            else:
                projections_concat = tf.concat(
                    self.encoder_projections_for_ctx, 1)

            contexts = tf.reduce_sum(
                tf.expand_dims(attentions, 2) * projections_concat, [1])

            next_contexts = tf.concat(
                [loop_state.contexts, tf.expand_dims(contexts, 0)], 0)
            next_weights = tf.concat(
                [loop_state.weights, tf.expand_dims(attentions, 0)], 0)

            next_loop_state = AttentionLoopState(
                contexts=next_contexts,
                weights=next_weights)

            return contexts, next_loop_state
Ejemplo n.º 11
0
def _sentinel(state, prev_state, input_):
    """Sentinel value given the decoder state."""
    with tf.variable_scope("sentinel"):

        decoder_state_size = state.get_shape()[-1].value
        st_with_inp = tf.concat([prev_state, input_], 1)

        gate = tf.nn.sigmoid(tf.layers.dense(st_with_inp, decoder_state_size))
        sentinel_value = gate * state

        assert_shape(sentinel_value, [-1, decoder_state_size])

        return sentinel_value
Ejemplo n.º 12
0
    def outputs_bidi(self):
        """Outputs of the bidirectional layer"""

        # outputs and outputs_rev, both lists in time of shape batch x rnn_size
        outputs_bidi = [
            tf.concat(1, [o1, o2])
            for o1, o2 in zip(self._outputs, self._outputs_rev)
        ]
        # concatenations have shape batch x (2 * rnn_size)
        for out in outputs_bidi:
            assert_shape(out, [None, self._output_size])

        return outputs_bidi
Ejemplo n.º 13
0
def _sentinel(state, prev_state, input_):
    """Sentinel value given the decoder state."""
    with tf.variable_scope("sentinel"):

        decoder_state_size = state.get_shape()[-1].value
        st_with_inp = tf.concat([prev_state, input_], 1)

        gate = tf.nn.sigmoid(tf.layers.dense(st_with_inp, decoder_state_size))
        sentinel_value = gate * state

        assert_shape(sentinel_value, [-1, decoder_state_size])

        return sentinel_value
Ejemplo n.º 14
0
def _convolution(last_layer: tf.Tensor, last_n_channels: int, filter_size: int,
                 n_filters: int) -> tf.Tensor:
    """Applies convolution on a filter bank."""
    conv_w = tf.get_variable(
        "wieghts",
        shape=[filter_size, filter_size, last_n_channels, n_filters],
        initializer=tf.truncated_normal_initializer(stddev=.1))
    conv_b = tf.get_variable("biases",
                             shape=[n_filters],
                             initializer=tf.constant_initializer(.1))
    conv_activation = tf.nn.conv2d(last_layer, conv_w, [1, 1, 1, 1],
                                   "SAME") + conv_b
    assert_shape(conv_activation, [
        None,
        last_layer.get_shape()[1].value,
        last_layer.get_shape()[2].value, filter_size
    ])
    return tf.nn.relu(conv_activation)
Ejemplo n.º 15
0
    def attention(self, decoder_state, decoder_prev_state, decoder_input):
        with tf.variable_scope(self.scope):
            projected_state = linear(decoder_state, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])
            attn_ctx_vectors = [
                a.attention(decoder_state, decoder_prev_state, decoder_input)
                for a in self._attn_objs]

            proj_ctxs, attn_logits = [list(t) for t in zip(*[
                self._vector_logit(projected_state, ctx_vec, scope=enc.name)
                for ctx_vec, enc in zip(attn_ctx_vectors, self._encoders)])]

            if self._use_sentinels:
                sentinel_value = _sentinel(decoder_state,
                                           decoder_prev_state,
                                           decoder_input)
                proj_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                proj_ctxs.append(proj_sentinel)
                attn_logits.append(sentinel_logit)

            attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1))
            self.attentions_in_time.append(attention_distr)

            if self._share_projections:
                output_cxts = proj_ctxs
            else:
                output_cxts = [
                    tf.expand_dims(
                        linear(ctx_vec, self.attention_state_size,
                               scope="proj_attn_{}".format(enc.name)), 1)
                    for ctx_vec, enc in zip(attn_ctx_vectors, self._encoders)]
                if self._use_sentinels:
                    output_cxts.append(tf.expand_dims(
                        linear(sentinel_value, self.attention_state_size,
                               scope="proj_sentinel"), 1))

            projections_concat = tf.concat(output_cxts, 1)
            context = tf.reduce_sum(
                tf.expand_dims(attention_distr, 2) * projections_concat, [1])

            return context
Ejemplo n.º 16
0
    def attention(self, decoder_state, decoder_prev_state, decoder_input):
        with tf.variable_scope(self.scope):
            projected_state = linear(decoder_state, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])

            logits = []

            for proj, bias in zip(self.encoder_projections_for_logits,
                                  self.encoder_attn_biases):

                logits.append(tf.reduce_sum(
                    self.attn_v * tf.tanh(projected_state + proj), [2]) + bias)

            if self._use_sentinels:
                sentinel_value = _sentinel(decoder_state,
                                           decoder_prev_state,
                                           decoder_input)
                projected_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                logits.append(sentinel_logit)

            attentions = self._renorm_softmax(tf.concat(logits, 1))

            self.attentions_in_time.append(attentions)

            if self._use_sentinels:
                tiled_encoder_projections = self._tile_encoders_for_beamsearch(
                    projected_sentinel)

                projections_concat = tf.concat(
                    tiled_encoder_projections + [projected_sentinel], 1)

            else:
                projections_concat = tf.concat(
                    self.encoder_projections_for_ctx, 1)

            contexts = tf.reduce_sum(
                tf.expand_dims(attentions, 2) * projections_concat, [1])

            return contexts
Ejemplo n.º 17
0
    def _vector_logit(self, projected_decoder_state: tf.Tensor,
                      vector_value: tf.Tensor, scope: str) -> tf.Tensor:
        """Get logit for a single vector, e.g., sentinel vector."""
        assert_shape(projected_decoder_state, [-1, 1, -1])
        assert_shape(vector_value, [-1, -1])

        with tf.variable_scope("{}_logit".format(scope)):
            vector_bias = get_variable("vector_bias", [],
                                       initializer=tf.zeros_initializer())

            proj_vector_for_logit = tf.expand_dims(
                tf.layers.dense(vector_value,
                                self.attention_state_size,
                                name="vector_projection"), 1)

            if self._share_projections:
                proj_vector_for_ctx = proj_vector_for_logit
            else:
                proj_vector_for_ctx = tf.expand_dims(
                    tf.layers.dense(vector_value,
                                    self.attention_state_size,
                                    name="vector_ctx_proj"), 1)

            vector_logit = tf.reduce_sum(
                self.attn_v *
                tf.tanh(projected_decoder_state + proj_vector_for_logit),
                [2]) + vector_bias
            assert_shape(vector_logit, [-1, 1])
            return proj_vector_for_ctx, vector_logit
Ejemplo n.º 18
0
    def _vector_logit(self,
                      projected_decoder_state: tf.Tensor,
                      vector_value: tf.Tensor,
                      scope: str) -> tf.Tensor:
        """Get logit for a single vector, e.g., sentinel vector."""
        assert_shape(projected_decoder_state, [-1, 1, -1])
        assert_shape(vector_value, [-1, -1])

        with tf.variable_scope("{}_logit".format(scope)):
            vector_bias = get_variable(
                "vector_bias", [],
                initializer=tf.zeros_initializer())

            proj_vector_for_logit = tf.expand_dims(
                tf.layers.dense(vector_value, self.attention_state_size,
                                name="vector_projection"), 1)

            if self._share_projections:
                proj_vector_for_ctx = proj_vector_for_logit
            else:
                proj_vector_for_ctx = tf.expand_dims(
                    tf.layers.dense(vector_value, self.attention_state_size,
                                    name="vector_ctx_proj"), 1)

            vector_logit = tf.reduce_sum(
                self.attn_v
                * tf.tanh(projected_decoder_state + proj_vector_for_logit),
                [2]) + vector_bias
            assert_shape(vector_logit, [-1, 1])
            return proj_vector_for_ctx, vector_logit
Ejemplo n.º 19
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # pylint: disable=protected-access
        self._encoders_tensors = [e._attention_tensor for e in self._encoders]
        self._encoders_masks = [e._attention_mask for e in self._encoders]
        # pylint: enable=protected-access

        for e_m in self._encoders_masks:
            assert_shape(e_m, [-1, -1])

        for e_t in self._encoders_tensors:
            assert_shape(e_t, [-1, -1, -1])

        with tf.variable_scope(self.scope):
            self.encoder_projections_for_logits = \
                self.get_encoder_projections("logits_projections")

            self.encoder_attn_biases = [
                tf.get_variable(name="attn_bias_{}".format(i),
                                shape=[],
                                initializer=tf.constant_initializer(0.))
                for i in range(len(self._encoders_tensors))]

            if self._share_projections:
                self.encoder_projections_for_ctx = \
                    self.encoder_projections_for_logits
            else:
                self.encoder_projections_for_ctx = \
                    self.get_encoder_projections("context_projections")

            if self._use_sentinels:
                self._encoders_masks.append(
                    tf.ones([tf.shape(self._encoders_masks[0])[0], 1]))

            self.masks_concat = tf.concat(self._encoders_masks, 1)
Ejemplo n.º 20
0
    def attention(
        self, query: tf.Tensor, decoder_prev_state: tf.Tensor,
        decoder_input: tf.Tensor, loop_state: HierarchicalLoopState
    ) -> Tuple[tf.Tensor, HierarchicalLoopState]:

        with tf.variable_scope(self.att_scope_name):
            projected_state = tf.layers.dense(query, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])
            attn_ctx_vectors, child_loop_states = zip(*[
                a.attention(query, decoder_prev_state, decoder_input, ls)
                for a, ls in zip(self.attentions, loop_state.child_loop_states)
            ])

            proj_ctxs, attn_logits = [
                list(t) for t in zip(*[
                    self._vector_logit(projected_state,
                                       ctx_vec,
                                       scope=att.name)  # type: ignore
                    for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)
                ])
            ]

            if self._use_sentinels:
                sentinel_value = _sentinel(query, decoder_prev_state,
                                           decoder_input)
                proj_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                proj_ctxs.append(proj_sentinel)
                attn_logits.append(sentinel_logit)

            attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1))
            self.attentions_in_time.append(attention_distr)

            if self._share_projections:
                output_cxts = proj_ctxs
            else:
                output_cxts = [
                    tf.expand_dims(
                        tf.layers.dense(ctx_vec,
                                        self.attention_state_size,
                                        name="proj_attn_{}".format(att.name)),
                        1)  # type: ignore
                    for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)
                ]
                if self._use_sentinels:
                    output_cxts.append(
                        tf.expand_dims(
                            tf.layers.dense(sentinel_value,
                                            self.attention_state_size,
                                            name="proj_sentinel"), 1))

            projections_concat = tf.concat(output_cxts, 1)
            context = tf.reduce_sum(
                tf.expand_dims(attention_distr, 2) * projections_concat, [1])

            prev_loop_state = loop_state.loop_state

            next_contexts = tf.concat(
                [prev_loop_state.contexts,
                 tf.expand_dims(context, 0)], axis=0)
            next_weights = tf.concat(
                [prev_loop_state.weights,
                 tf.expand_dims(attention_distr, 0)],
                axis=0)

            next_loop_state = AttentionLoopState(contexts=next_contexts,
                                                 weights=next_weights)

            next_hier_loop_state = HierarchicalLoopState(
                child_loop_states=list(child_loop_states),
                loop_state=next_loop_state)

            return context, next_hier_loop_state
Ejemplo n.º 21
0
 def __init__(self, cell, mask, scale) -> None:
     self._cell = cell
     self._mask = mask
     assert_shape(mask, [None, cell.sate_size])
     self._scale = scale
Ejemplo n.º 22
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Tuple[int, int, Optional[int]]],
                 image_height: int,
                 image_width: int,
                 pixel_dim: int,
                 fully_connected: Optional[List[int]] = None,
                 batch_normalization: bool = True,
                 local_response_normalization: bool = True,
                 dropout_keep_prob: float = 0.5,
                 attention_type: Type = Attention,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Initialize a convolutional network for image processing.

        Args:
            convolutions: Configuration of convolutional layers. It is a list
                of triplets of integers where the values are: size of the
                convolutional window, number of convolutional filters, and size
                of max-pooling window. If the max-pooling size is set to None,
                no pooling is performed.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the image.
            pixel_dim: Number of color channels in the input images.
            batch_normalization: Flag whether the batch normalization
                should be used between the convolutional layers.
            local_response_normalization: Flag whether to use local
                response normalization between the convolutional layers.
            dropout_keep_prob: Probability of keeping neurons active in
                dropout. Dropout is done between all convolutional layers and
                fully connected layer.
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        with self.use_scope():
            self.dropout_placeholder = tf.placeholder(tf.float32,
                                                      name="dropout")
            self.train_mode = tf.placeholder(tf.bool,
                                             shape=[],
                                             name="mode_placeholder")
            self.input_op = tf.placeholder(tf.float32,
                                           shape=(None, image_height,
                                                  image_width, pixel_dim),
                                           name="input_images")

            self.padding_masks = tf.placeholder(tf.float32,
                                                shape=(None, image_height,
                                                       image_width, 1),
                                                name="padding_masks")

            last_layer = self.input_op
            last_padding_masks = self.padding_masks

            self.image_processing_layers = []  # type: List[tf.Tensor]

            with tf.variable_scope("convolutions"):
                for i, (filter_size, n_filters,
                        pool_size) in enumerate(convolutions):
                    with tf.variable_scope("cnn_layer_{}".format(i)):
                        last_layer = conv2d(last_layer, n_filters, filter_size)
                        self.image_processing_layers.append(last_layer)

                        if pool_size:
                            last_layer = max_pool2d(last_layer, pool_size)
                            self.image_processing_layers.append(last_layer)
                            last_padding_masks = max_pool2d(
                                last_padding_masks, pool_size)

                        if local_response_normalization:
                            last_layer = tf.nn.local_response_normalization(
                                last_layer)

                        if batch_normalization:
                            last_layer = batch_norm(
                                last_layer, is_training=self.train_mode)

                        last_layer = dropout(last_layer, dropout_keep_prob,
                                             self.train_mode)

                # last_layer shape is batch X height X width X channels
                last_layer = last_layer * last_padding_masks

            # pylint: disable=no-member
            last_height, last_width, last_n_channels = [
                s.value for s in last_layer.get_shape()[1:]
            ]
            # pylint: enable=no-member

            if fully_connected is None:
                # we average out by the image size -> shape is number
                # channels from the last convolution
                self.encoded = tf.reduce_mean(last_layer, [1, 2])
                assert_shape(self.encoded, [None, convolutions[-1][1]])
            else:
                last_layer_flat = tf.reshape(
                    last_layer,
                    [-1, last_width * last_height * last_n_channels])
                self.encoded = multilayer_projection(
                    last_layer_flat,
                    fully_connected,
                    activation=tf.nn.relu,
                    dropout_plc=self.dropout_placeholder)

            self.__attention_tensor = tf.reshape(
                last_layer, [-1, last_width * last_height, last_n_channels])

            self.__attention_mask = tf.reshape(last_padding_masks,
                                               [-1, last_width * last_height])
Ejemplo n.º 23
0
    def _create_encoder_graph(self):
        self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout")
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        self.padding_weights = [
            tf.placeholder(tf.float32, shape=[None], name="input_{}".format(i))
            for i in range(self.max_input_len + 2)
        ]

        sentence_lengths = tf.to_int64(sum(self.padding_weights))

        self.factor_inputs = {}
        factors = []

        for data_id, vocabulary, embedding_size in zip(self.data_ids,
                                                       self.vocabularies,
                                                       self.embedding_sizes):
            # Create data placehoders. The tensors' length is max_input_len+2
            # because we add explicit start and end symbols.
            prefix = ""
            if len(self.data_ids) > 1:
                prefix = "{}_".format(data_id)

            names = [
                "{}input_{}".format(prefix, i)
                for i in range(self.max_input_len + 2)
            ]

            inputs = [
                tf.placeholder(tf.int32, shape=[None], name=n) for n in names
            ]

            # Create embeddings for this factor and embed the placeholders
            # NOTE the initialization
            embeddings = tf.get_variable(
                "word_embeddings",
                shape=[len(vocabulary), embedding_size],
                initializer=tf.random_normal_initializer(stddev=0.01))

            embedded_inputs = [
                tf.nn.embedding_lookup(embeddings, i) for i in inputs
            ]

            dropped_embedded_inputs = [
                tf.nn.dropout(i, self.dropout_placeholder)
                for i in embedded_inputs
            ]

            # Resulting shape is batch x embedding_size
            assert_shape(dropped_embedded_inputs, [None, embedding_size])
            factors.append(dropped_embedded_inputs)

            # Add inputs and weights to self to be able to feed them
            self.factor_inputs[data_id] = inputs

        # Concatenate all embedded factors into one tensor
        # Resulting shape is batch x sum(embedding_size)

        # factors is a 2D list of embeddings of dims [factor-type, time-step]
        # by doing zip(*factors), we get a list of (factor-type) embedding
        # tuples indexed by the time step
        concatenated_factors = [
            tf.concat(1, related_factors) for related_factors in zip(*factors)
        ]
        assert_shape(concatenated_factors[0],
                     [None, sum(self.embedding_sizes)])
        forward_gru, backward_gru = self._get_birnn_cells()

        bidi_layer = BidirectionalRNNLayer(forward_gru, backward_gru,
                                           concatenated_factors,
                                           sentence_lengths)

        self.outputs_bidi = bidi_layer.outputs_bidi
        self.encoded = bidi_layer.encoded

        self.__attention_tensor = tf.concat(
            1, [tf.expand_dims(o, 1) for o in self.outputs_bidi])
        self.__attention_tensor = tf.nn.dropout(self.__attention_tensor,
                                                self.dropout_placeholder)
        self.__attention_mask = tf.concat(
            1, [tf.expand_dims(w, 1) for w in self.padding_weights])
Ejemplo n.º 24
0
 def _encoders_tensors(self) -> List[tf.Tensor]:
     tensors = [get_attention_states(e) for e in self._encoders]
     for e_t in tensors:
         assert_shape(e_t, [-1, -1, -1])
     return tensors
 def __init__(self, cell, mask, scale):
     self._cell = cell
     self._mask = mask
     assert_shape(mask, [None, cell.sate_size])
     self._scale = scale
Ejemplo n.º 26
0
    def attention(self,
                  query: tf.Tensor,
                  decoder_prev_state: tf.Tensor,
                  decoder_input: tf.Tensor,
                  loop_state: HierarchicalLoopState) -> Tuple[
                      tf.Tensor, HierarchicalLoopState]:

        with tf.variable_scope(self.att_scope_name):
            projected_state = tf.layers.dense(query, self.attention_state_size)
            projected_state = tf.expand_dims(projected_state, 1)

            assert_shape(projected_state, [-1, 1, self.attention_state_size])
            attn_ctx_vectors, child_loop_states = zip(*[
                a.attention(query, decoder_prev_state, decoder_input, ls)
                for a, ls in zip(self.attentions,
                                 loop_state.child_loop_states)])

            proj_ctxs, attn_logits = [list(t) for t in zip(*[
                self._vector_logit(projected_state,
                                   ctx_vec, scope=att.name)  # type: ignore
                for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)])]

            if self._use_sentinels:
                sentinel_value = _sentinel(query,
                                           decoder_prev_state,
                                           decoder_input)
                proj_sentinel, sentinel_logit = self._vector_logit(
                    projected_state, sentinel_value, scope="sentinel")
                proj_ctxs.append(proj_sentinel)
                attn_logits.append(sentinel_logit)

            attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1))
            self.attentions_in_time.append(attention_distr)

            if self._share_projections:
                output_cxts = proj_ctxs
            else:
                output_cxts = [
                    tf.expand_dims(
                        tf.layers.dense(ctx_vec, self.attention_state_size,
                                        name="proj_attn_{}".format(
                                            att.name)), 1)  # type: ignore
                    for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)]
                if self._use_sentinels:
                    output_cxts.append(tf.expand_dims(
                        tf.layers.dense(
                            sentinel_value, self.attention_state_size,
                            name="proj_sentinel"), 1))

            projections_concat = tf.concat(output_cxts, 1)
            context = tf.reduce_sum(
                tf.expand_dims(attention_distr, 2) * projections_concat, [1])

            prev_loop_state = loop_state.loop_state

            next_contexts = tf.concat(
                [prev_loop_state.contexts, tf.expand_dims(context, 0)], axis=0)
            next_weights = tf.concat(
                [prev_loop_state.weights, tf.expand_dims(attention_distr, 0)],
                axis=0)

            next_loop_state = AttentionLoopState(
                contexts=next_contexts,
                weights=next_weights)

            next_hier_loop_state = HierarchicalLoopState(
                child_loop_states=list(child_loop_states),
                loop_state=next_loop_state)

            return context, next_hier_loop_state
Ejemplo n.º 27
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Tuple[int, int, Optional[int]]],
                 image_height: int,
                 image_width: int,
                 pixel_dim: int,
                 batch_normalization: bool = True,
                 local_response_normalization: bool = True,
                 dropout_keep_prob: float = 0.5,
                 attention_type: Type = Attention,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Initialize a convolutional network for image processing.

        Args:
            convolutions (list): Configuration convolutional layers. It is a
                list of tripplets of integers where the values are: size of the
                convolutional window, number of convolutional filters, and size
                of max-pooling window. If the max-pooling size is set to None,
                no pooling is performed.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the images (padded)
            pixel_dim: Number of color channels in the input images.
            batch_normalization: Flag whether the batch normalization
                should be used between the convolutional layers.
            local_response_normalization: Flag whether to use local
                response normalization between the convolutional layers.
            dropout_placeholder: Placeholder keeping the
                dropout keeping probability

        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        self.convolutions = convolutions
        self.data_id = data_id
        self.image_height = image_height
        self.image_width = image_width
        self.pixel_dim = pixel_dim
        self.dropout_keep_prob = dropout_keep_prob

        with tf.variable_scope(name):
            self.dropout_placeholder = tf.placeholder(tf.float32,
                                                      name="dropout")
            self.is_training = tf.placeholder(tf.bool, name="is_training")
            self.input_op = tf.placeholder(tf.float32,
                                           shape=(None, image_height,
                                                  image_width, pixel_dim),
                                           name="input_images")

            self.padding_masks = tf.placeholder(tf.float32,
                                                shape=(None, image_height,
                                                       image_width, 1),
                                                name="padding_masks")

            last_layer = self.input_op
            last_padding_masks = self.padding_masks
            last_n_channels = pixel_dim

            self.is_training = tf.placeholder(tf.bool, name="is_training")
            self.image_processing_layers = []  # type: List[tf.Tensor]

            with tf.variable_scope("convolutions"):
                for i, (filter_size, n_filters,
                        pool_size) in enumerate(convolutions):
                    with tf.variable_scope("cnn_layer_{}".format(i)):
                        last_layer = _convolution(last_layer, last_n_channels,
                                                  filter_size, n_filters)
                        last_n_channels = n_filters
                        self.image_processing_layers.append(last_layer)

                        if pool_size:
                            # TODO do the pooling properly
                            last_layer = tf.nn.max_pool(
                                last_layer, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
                            last_padding_masks = tf.nn.max_pool(
                                last_padding_masks, [1, 2, 2, 1], [1, 2, 2, 1],
                                "SAME")
                            self.image_processing_layers.append(last_layer)
                            assert image_height % 2 == 0
                            image_height //= 2
                            assert image_width % 2 == 0
                            image_width //= 2

                        if local_response_normalization:
                            last_layer = tf.nn.local_response_normalization(
                                last_layer)

                        if batch_normalization:
                            last_layer = _batch_norm(last_layer, n_filters,
                                                     self.is_training)

                        last_layer = tf.nn.dropout(
                            last_layer, keep_prob=self.dropout_placeholder)

                # last_layer shape is batch X height X width X channels
                last_layer = last_layer * last_padding_masks

            # we average out by the image size -> shape is number
            # channels from the last convolution
            self.encoded = tf.reduce_mean(last_layer, [1, 2])
            # TODO assert shape
            assert_shape(self.encoded, [None, self.convolutions[-1][1]])

            self.__attention_tensor = tf.reshape(
                last_layer, [-1, image_width, last_n_channels * image_height])

            self.__attention_mask = tf.squeeze(
                tf.reduce_prod(last_padding_masks, [1]), [2])
Ejemplo n.º 28
0
 def _encoders_tensors(self) -> List[tf.Tensor]:
     tensors = [get_attention_states(e) for e in self._encoders]
     for e_t in tensors:
         assert_shape(e_t, [-1, -1, -1])
     return tensors