def highway_layer(self) -> tf.Tensor:
     """Highway net projection following the CNN."""
     # pylint: disable=no-member
     cnn_out_size = self.cnn_encoded.get_shape().as_list()[-1]
     highway_layer = tf.reshape(self.cnn_encoded, [-1, cnn_out_size])
     for i in range(self.highway_depth):
         highway_layer = highway(highway_layer,
                                 scope=("highway_layer_%s" % i))
     return tf.reshape(highway_layer, [self.batch_size, -1, cnn_out_size])
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 segment_size: int,
                 highway_depth: int,
                 rnn_size: int,
                 filters: List[Tuple[int, int]],
                 max_input_len: Optional[int] = None,
                 dropout_keep_prob: float = 1.0,
                 attention_type: Optional[Any] = None,
                 attention_fertility: int = 3,
                 use_noisy_activations: bool = False,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Create a new instance of the sentence encoder.

        Arguments:
            vocabulary: Input vocabulary
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            max_input_len: Maximum length of an encoded sequence
            embedding_size: The size of the embedding vector assigned
                to each word
            segment_size: The size of the segments over which we apply
                max-pooling.
            highway_depth: Depth of the highway layer.
            rnn_size: The size of the encoder's hidden state. Note
                that the actual encoder output state size will be
                twice as long because it is the result of
                concatenation of forward and backward hidden states.
            filters: Specification of CNN filters. It is a list of tuples
                specifying the filter size and number of channels.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
            attention_fertility: Fertility parameter used with
                CoverageAttention (default 3).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self,
                           attention_type,
                           attention_fertility=attention_fertility)

        assert check_argument_types()

        self.vocabulary = vocabulary
        self.data_id = data_id

        self.max_input_len = max_input_len
        self.embedding_size = embedding_size
        self.segment_size = segment_size
        self.highway_depth = highway_depth
        self.rnn_size = rnn_size
        self.filters = filters
        self.dropout_keep_p = dropout_keep_prob
        self.use_noisy_activations = use_noisy_activations

        if max_input_len is not None and max_input_len <= 0:
            raise ValueError("Input length must be a positive integer.")

        log("Initializing sentence encoder, name: '{}'".format(self.name))

        with self.use_scope():
            self._create_input_placeholders()
            with tf.variable_scope('input_projection'):
                self._create_embedding_matrix()
                embedded_inputs = self._embed(self.inputs)  # type: tf.Tensor
                self.embedded_inputs = embedded_inputs

            # CNN Network
            pooled_outputs = []
            for filter_size, num_filters in self.filters:
                with tf.variable_scope("conv-maxpool-%s" % filter_size):
                    filter_shape = [filter_size, embedding_size, num_filters]
                    w_filter = tf.get_variable(
                        "conv_W",
                        filter_shape,
                        initializer=tf.random_uniform_initializer(-0.5, 0.5))
                    b_filter = tf.get_variable(
                        "conv_bias", [num_filters],
                        initializer=tf.constant_initializer(0.0))
                    conv = tf.nn.conv1d(embedded_inputs,
                                        w_filter,
                                        stride=1,
                                        padding="SAME",
                                        name="conv")

                    # Apply nonlinearity
                    conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                    # Max-pooling over the output segments
                    expanded_conv_relu = tf.expand_dims(conv_relu, -1)
                    pooled = tf.nn.max_pool(
                        expanded_conv_relu,
                        ksize=[1, self.segment_size, 1, 1],
                        strides=[1, self.segment_size, 1, 1],
                        padding="SAME",
                        name="maxpool")
                    pooled_outputs.append(pooled)

            # Combine all the pooled features
            self.cnn_encoded = tf.concat(pooled_outputs, axis=2)
            self.cnn_encoded = tf.squeeze(self.cnn_encoded, [3])

            # Highway Network
            batch_size = tf.shape(self.cnn_encoded)[0]
            # pylint: disable=no-member
            cnn_out_size = self.cnn_encoded.get_shape().as_list()[-1]
            highway_layer = tf.reshape(self.cnn_encoded, [-1, cnn_out_size])
            for i in range(self.highway_depth):
                highway_layer = highway(highway_layer,
                                        scope=("highway_layer_%s" % i))
            highway_layer = tf.reshape(highway_layer,
                                       [batch_size, -1, cnn_out_size])

            # BiRNN Network
            fw_cell, bw_cell = self.rnn_cells()  # type: RNNCellTuple
            seq_lens = tf.ceil(
                tf.divide(self.sentence_lengths, self.segment_size))
            seq_lens = tf.cast(seq_lens, tf.int32)
            outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                highway_layer,
                sequence_length=seq_lens,
                dtype=tf.float32)

            self.hidden_states = tf.concat(outputs_bidi_tup, 2)

            with tf.variable_scope('attention_tensor'):
                self.__attention_tensor = self._dropout(self.hidden_states)

            self.encoded = tf.concat(encoded_tup, 1)

        log("Sentence encoder initialized")