예제 #1
0
    def __init__(self,
                 name: str,
                 input_sequence: EmbeddedSequence,
                 conv_features: int,
                 encoder_layers: int,
                 kernel_width: int = 5,
                 dropout_keep_prob: float = 1.0,
                 attention_type: type = None,
                 attention_state_size: int = None,
                 attention_fertility: int = 3,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:

        assert check_argument_types()

        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self,
                           attention_type,
                           attention_state_size=attention_state_size,
                           attention_fertility=attention_fertility)

        self.input_sequence = input_sequence
        self.encoder_layers = encoder_layers
        self.conv_features = conv_features
        self.kernel_width = kernel_width
        self.dropout_keep_prob = dropout_keep_prob

        if conv_features <= 0:
            raise ValueError("Number of features must be a positive integer.")
        if encoder_layers <= 0:
            raise ValueError(
                "Number of encoder layers must be a positive integer.")

        log("Initializing convolutional seq2seq encoder, name {}".format(
            self.name))
예제 #2
0
    def __init__(self,
                 name: str,
                 encoders: List[Any],
                 attention_type: Type,
                 attention_state_size: int,
                 use_sentinels=False,
                 share_attn_projections=False) -> None:
        """Initializes the encoder wrapper.

        Args:
            name: Name of the encoder / its scope.
            encoders: List of encoders to be wrapped.
            attention_type: Type of the attention combination.
            attention_state_size: Dimension of the state projection of
                attention energy computation.
            use_sentinels: Flag whether the sentinel mechanism should be added
                to the attention combination.
            share_attn_projections: Flag whether the hidden state projection
                should be shared for the both the energies computation and
                context vector computation.
        """

        ModelPart.__init__(self, name, None, None)
        Attentive.__init__(self, attention_type)
        self.encoders = encoders
        self._attention_type = attention_type
        self._attention_state_size = attention_state_size
        self._use_sentinels = use_sentinels
        self._share_attn_projections = share_attn_projections

        self.encoded = tf.concat([e.encoded for e in encoders], 1)
예제 #3
0
    def __init__(self,
                 name: str,
                 input_sequence: Sequence,
                 rnn_size: int,
                 dropout_keep_prob: float = 1.0,
                 rnn_cell: str = "GRU",
                 attention_type: type = None,
                 attention_state_size: int = None,
                 attention_fertility: int = 3,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create a new instance of a recurrent encoder."""
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self,
                           attention_type,
                           attention_state_size=attention_state_size,
                           attention_fertility=attention_fertility)
        check_argument_types()

        self.input_sequence = input_sequence
        self.rnn_size = rnn_size
        self.dropout_keep_prob = dropout_keep_prob
        self.rnn_cell_str = rnn_cell

        if self.rnn_size <= 0:
            raise ValueError("RNN size must be a positive integer.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if self.rnn_cell_str not in RNN_CELL_TYPES:
            raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
    def __init__(self,
                 name: str,
                 max_input_len: int,
                 vocabularies: List[Vocabulary],
                 data_ids: List[str],
                 embedding_sizes: List[int],
                 rnn_size: int,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None,
                 **kwargs) -> None:
        """Construct a new instance of the factored encoder.

        Args:
            max_input_len: Maximum input length (longer sequences are trimmed)
            vocabularies: List of vocabularies indexed
            data_ids: List of data series IDs
            embedding_sizes: List of embedding sizes for each data series
            name: The name for this encoder. [sentence_encoder]
            rnn_size: The size of the hidden state

        Keyword arguments:
            use_noisy_activations: Boolean flag whether to use noisy activation
                                   functions in RNN cells.
                                   (see neuralmonkey.nn.noisy_gru_cell) [False]
            attention_type: The attention to use. [None]
            attention_fertility: Fertility for CoverageAttention (if used). [3]
            dropout_keep_prob: 1 - Dropout probability [1]
        """
        attention_type = kwargs.get("attention_type", None)
        Attentive.__init__(self, attention_type)
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        assert check_argument_types()

        self.vocabularies = vocabularies
        self.data_ids = data_ids
        self.embedding_sizes = embedding_sizes

        self.max_input_len = max_input_len
        self.rnn_size = rnn_size

        self.dropout_keep_prob = kwargs.get("dropout_keep_prob", 1)

        self.use_noisy_activations = kwargs.get("use_noisy_activations", False)

        log("Building encoder graph, name: '{}'.".format(self.name))
        with tf.variable_scope(self.name):
            self._create_encoder_graph()
            log("Encoder graph constructed.")
예제 #5
0
    def __init__(self,
                 name: str,
                 max_input_len: int,
                 vocabularies: List[Vocabulary],
                 data_ids: List[str],
                 embedding_sizes: List[int],
                 rnn_size: int,
                 dropout_keep_prob: float = 1.0,
                 attention_type: Optional[Any] = None,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Construct a new instance of the factored encoder.

        Args:
            max_input_len: Maximum input length (longer sequences are trimmed)
            vocabularies: List of vocabularies indexed
            data_ids: List of data series IDs
            embedding_sizes: List of embedding sizes for each data series
            name: The name for this encoder. [sentence_encoder]
            rnn_size: The size of the hidden state

        Keyword arguments:
            attention_type: The attention to use. [None]
            attention_fertility: Fertility for CoverageAttention (if used). [3]
            dropout_keep_prob: 1 - Dropout probability [1]
        """
        Attentive.__init__(self, attention_type)
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        assert check_argument_types()

        self.vocabularies = vocabularies
        self.data_ids = data_ids
        self.embedding_sizes = embedding_sizes

        self.max_input_len = max_input_len
        self.rnn_size = rnn_size

        self.dropout_keep_prob = dropout_keep_prob

        log("Building encoder graph, name: '{}'.".format(self.name))
        with self.use_scope():
            self._create_encoder_graph()
            log("Encoder graph constructed.")
예제 #6
0
    def __init__(self,
                 name: str,
                 input_shape: List[int],
                 output_shape: int,
                 data_id: str,
                 dropout_keep_prob: float = 1.0,
                 attention_type=None,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        assert len(input_shape) == 3
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(attention_type, {})

        self.input_shape = input_shape
        self.output_shape = output_shape
        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob
        self.attention_type = attention_type

        with tf.variable_scope(self.name):
            self.dropout_placeholder = tf.placeholder(tf.float32)
            features_shape = [None] + input_shape  # type: ignore
            self.image_features = tf.placeholder(tf.float32,
                                                 shape=features_shape,
                                                 name="image_input")

            self.flat = tf.reduce_mean(self.image_features,
                                       reduction_indices=[1, 2],
                                       name="average_image")
            project_w = tf.get_variable(
                name="img_init_proj_W",
                shape=[input_shape[2], output_shape],
                initializer=tf.random_normal_initializer())
            project_b = tf.get_variable(name="img_init_b",
                                        initializer=tf.zeros_initializer(
                                            [output_shape]))

            self.encoded = tf.tanh(tf.matmul(self.flat, project_w) + project_b)

            self.__attention_tensor = tf.reshape(
                self.image_features,
                [-1, input_shape[0] * input_shape[1], input_shape[2]],
                name="flatten_image")
예제 #7
0
    def __init__(self,
                 name: str,
                 input_shape: List[int],
                 output_shape: int,
                 data_id: str,
                 attention_type: Callable = None,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)
        check_argument_types()

        assert len(input_shape) == 3
        if output_shape <= 0:
            raise ValueError("Output vector dimension must be postive.")

        self.data_id = data_id

        with self.use_scope():
            features_shape = [None] + input_shape  # type: ignore
            self.image_features = tf.placeholder(tf.float32,
                                                 shape=features_shape,
                                                 name="image_input")

            self.flat = tf.reduce_mean(self.image_features,
                                       axis=[1, 2],
                                       name="average_image")
            project_w = tf.get_variable(
                name="img_init_proj_W",
                shape=[input_shape[2], output_shape],
                initializer=tf.random_normal_initializer())
            project_b = tf.get_variable(
                name="img_init_b", shape=[output_shape],
                initializer=tf.zeros_initializer())

            self.encoded = tf.tanh(tf.matmul(self.flat, project_w) + project_b)

            self.__attention_tensor = tf.reshape(
                self.image_features,
                [-1, input_shape[0] * input_shape[1],
                 input_shape[2]],
                name="flatten_image")
예제 #8
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Tuple[int, int, Optional[int]]],
                 image_height: int,
                 image_width: int,
                 pixel_dim: int,
                 fully_connected: Optional[List[int]] = None,
                 dropout_keep_prob: float = 0.5,
                 attention_type: Type = Attention,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Initialize a convolutional network for image processing.

        Args:
            convolutions: Configuration of convolutional layers. It is a list
                of triplets of integers where the values are: size of the
                convolutional window, number of convolutional filters, and size
                of max-pooling window. If the max-pooling size is set to None,
                no pooling is performed.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the image.
            pixel_dim: Number of color channels in the input images.
            dropout_keep_prob: Probability of keeping neurons active in
                dropout. Dropout is done between all convolutional layers and
                fully connected layer.
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        self.image_height = image_height
        self.image_width = image_width
        self.pixel_dim = pixel_dim
        self.convolutions = convolutions
        self.fully_connected = fully_connected
예제 #9
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 network_type: str,
                 output_layer: str,
                 attention_type: Type = Attention,
                 fine_tune: bool = False,
                 load_checkpoint: Optional[str] = None,
                 save_checkpoint: Optional[str] = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        self.data_id = data_id
        self._network_type = network_type
        self.input_plc = tf.placeholder(tf.float32,
                                        [None, self.HEIGHT, self.WIDTH, 3])

        if network_type not in SUPPORTED_NETWORKS:
            raise ValueError(
                "Network '{}' is not amonng the supoort ones ({})".format(
                    network_type, ", ".join(SUPPORTED_NETWORKS.keys())))

        scope, net_function = SUPPORTED_NETWORKS[network_type]
        with tf_slim.arg_scope(scope()):
            _, end_points = net_function(self.input_plc)

        with tf.variable_scope(self.name):
            net_output = end_points[output_layer]
            if not fine_tune:
                net_output = tf.stop_gradient(net_output)
            # pylint: disable=no-member
            shape = [s.value for s in net_output.get_shape()[1:]]
            # pylint: enable=no-member
            self.__attention_tensor = tf.reshape(
                net_output, [-1, shape[0] * shape[1], shape[2]])

            self.encoded = tf.reduce_mean(net_output, [1, 2])
예제 #10
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 segment_size: int,
                 highway_depth: int,
                 rnn_size: int,
                 filters: List[Tuple[int, int]],
                 max_input_len: Optional[int] = None,
                 dropout_keep_prob: float = 1.0,
                 attention_type: Optional[Any] = None,
                 attention_fertility: int = 3,
                 use_noisy_activations: bool = False,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Create a new instance of the sentence encoder.

        Arguments:
            vocabulary: Input vocabulary
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            max_input_len: Maximum length of an encoded sequence
            embedding_size: The size of the embedding vector assigned
                to each word
            segment_size: The size of the segments over which we apply
                max-pooling.
            highway_depth: Depth of the highway layer.
            rnn_size: The size of the encoder's hidden state. Note
                that the actual encoder output state size will be
                twice as long because it is the result of
                concatenation of forward and backward hidden states.
            filters: Specification of CNN filters. It is a list of tuples
                specifying the filter size and number of channels.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
            attention_fertility: Fertility parameter used with
                CoverageAttention (default 3).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self,
                           attention_type,
                           attention_fertility=attention_fertility)
        check_argument_types()

        self.vocabulary = vocabulary
        self.data_id = data_id

        self.max_input_len = max_input_len
        self.embedding_size = embedding_size
        self.segment_size = segment_size
        self.highway_depth = highway_depth
        self.rnn_size = rnn_size
        self.filters = filters
        self.dropout_keep_prob = dropout_keep_prob
        self.use_noisy_activations = use_noisy_activations

        if dropout_keep_prob <= 0. or dropout_keep_prob > 1.:
            raise ValueError(("Dropout keep probability must be "
                              "in (0; 1], was {}").format(dropout_keep_prob))

        if max_input_len is not None and max_input_len <= 0:
            raise ValueError("Input length must be a positive integer.")

        if embedding_size <= 0:
            raise ValueError("Embedding size must be a positive integer.")

        if rnn_size <= 0:
            raise ValueError("RNN size must be a positive integer.")

        if highway_depth <= 0:
            raise ValueError("Highway depth must be a positive integer.")

        if segment_size <= 0:
            raise ValueError("Segment size be a positive integer.")

        if not filters:
            raise ValueError("You must specify convolutional filters.")

        for filter_size, num_filters in self.filters:
            if filter_size <= 0:
                raise ValueError("Filter size must be a positive integer.")
            if num_filters <= 0:
                raise ValueError("Number of filters must be a positive int.")
예제 #11
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Tuple[int, int, Optional[int]]],
                 image_height: int,
                 image_width: int,
                 pixel_dim: int,
                 fully_connected: Optional[List[int]] = None,
                 batch_normalization: bool = True,
                 local_response_normalization: bool = True,
                 dropout_keep_prob: float = 0.5,
                 attention_type: Type = Attention,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Initialize a convolutional network for image processing.

        Args:
            convolutions: Configuration of convolutional layers. It is a list
                of triplets of integers where the values are: size of the
                convolutional window, number of convolutional filters, and size
                of max-pooling window. If the max-pooling size is set to None,
                no pooling is performed.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the image.
            pixel_dim: Number of color channels in the input images.
            batch_normalization: Flag whether the batch normalization
                should be used between the convolutional layers.
            local_response_normalization: Flag whether to use local
                response normalization between the convolutional layers.
            dropout_keep_prob: Probability of keeping neurons active in
                dropout. Dropout is done between all convolutional layers and
                fully connected layer.
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        with self.use_scope():
            self.dropout_placeholder = tf.placeholder(tf.float32,
                                                      name="dropout")
            self.train_mode = tf.placeholder(tf.bool,
                                             shape=[],
                                             name="mode_placeholder")
            self.input_op = tf.placeholder(tf.float32,
                                           shape=(None, image_height,
                                                  image_width, pixel_dim),
                                           name="input_images")

            self.padding_masks = tf.placeholder(tf.float32,
                                                shape=(None, image_height,
                                                       image_width, 1),
                                                name="padding_masks")

            last_layer = self.input_op
            last_padding_masks = self.padding_masks

            self.image_processing_layers = []  # type: List[tf.Tensor]

            with tf.variable_scope("convolutions"):
                for i, (filter_size, n_filters,
                        pool_size) in enumerate(convolutions):
                    with tf.variable_scope("cnn_layer_{}".format(i)):
                        last_layer = conv2d(last_layer, n_filters, filter_size)
                        self.image_processing_layers.append(last_layer)

                        if pool_size:
                            last_layer = max_pool2d(last_layer, pool_size)
                            self.image_processing_layers.append(last_layer)
                            last_padding_masks = max_pool2d(
                                last_padding_masks, pool_size)

                        if local_response_normalization:
                            last_layer = tf.nn.local_response_normalization(
                                last_layer)

                        if batch_normalization:
                            last_layer = batch_norm(
                                last_layer, is_training=self.train_mode)

                        last_layer = dropout(last_layer, dropout_keep_prob,
                                             self.train_mode)

                # last_layer shape is batch X height X width X channels
                last_layer = last_layer * last_padding_masks

            # pylint: disable=no-member
            last_height, last_width, last_n_channels = [
                s.value for s in last_layer.get_shape()[1:]
            ]
            # pylint: enable=no-member

            if fully_connected is None:
                # we average out by the image size -> shape is number
                # channels from the last convolution
                self.encoded = tf.reduce_mean(last_layer, [1, 2])
                assert_shape(self.encoded, [None, convolutions[-1][1]])
            else:
                last_layer_flat = tf.reshape(
                    last_layer,
                    [-1, last_width * last_height * last_n_channels])
                self.encoded = multilayer_projection(
                    last_layer_flat,
                    fully_connected,
                    activation=tf.nn.relu,
                    dropout_plc=self.dropout_placeholder)

            self.__attention_tensor = tf.reshape(
                last_layer, [-1, last_width * last_height, last_n_channels])

            self.__attention_mask = tf.reshape(last_padding_masks,
                                               [-1, last_width * last_height])
예제 #12
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 rnn_size: int,
                 attention_state_size: int = None,
                 max_input_len: int = None,
                 dropout_keep_prob: float = 1.0,
                 rnn_cell: str = "GRU",
                 attention_type: type = None,
                 attention_fertility: int = 3,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create a new instance of the sentence encoder.

        Arguments:
            vocabulary: Input vocabulary
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            max_input_len: Maximum length of an encoded sequence
            embedding_size: The size of the embedding vector assigned
                to each word
            rnn_size: The size of the encoder's hidden state. Note
                that the actual encoder output state size will be
                twice as long because it is the result of
                concatenation of forward and backward hidden states.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
            attention_state_size: The size of the attention inner state. If
                None, use the size of the encoder hidden state. (defalult None)
            attention_fertility: Fertility parameter used with
                CoverageAttention (default 3).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self,
                           attention_type,
                           attention_state_size=attention_state_size,
                           attention_fertility=attention_fertility)

        check_argument_types()

        self.vocabulary = vocabulary
        self.vocabulary_size = len(self.vocabulary)
        self.data_id = data_id
        self.embedding_size = embedding_size
        self.rnn_size = rnn_size

        self.max_input_len = max_input_len
        self.dropout_keep_prob = dropout_keep_prob
        self.rnn_cell_str = rnn_cell

        if self.max_input_len is not None and self.max_input_len <= 0:
            raise ValueError("Input length must be a positive integer.")

        if self.embedding_size <= 0:
            raise ValueError("Embedding size must be a positive integer.")

        if self.rnn_size <= 0:
            raise ValueError("RNN size must be a positive integer.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if self.rnn_cell_str not in RNN_CELL_TYPES:
            raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
예제 #13
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 rnn_size: int,
                 input_dimension: int,
                 max_input_len: Optional[int] = None,
                 dropout_keep_prob: float = 1.0,
                 attention_type: Optional[Any] = None,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Creates a new instance of the encoder.

        Arguments:
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            rnn_size: The size of the encoder's hidden state. Note
                that the actual encoder output state size will be
                twice as long because it is the result of
                concatenation of forward and backward hidden states.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        assert check_argument_types()

        self.data_id = data_id

        self.rnn_size = rnn_size
        self.max_input_len = max_input_len
        self.input_dimension = input_dimension
        self.dropout_keep_p = dropout_keep_prob

        log("Initializing RNN encoder, name: '{}'".format(self.name))

        with tf.variable_scope(self.name):
            self._create_input_placeholders()

            self._input_mask = tf.sequence_mask(self._input_lengths,
                                                dtype=tf.float32)

            fw_cell, bw_cell = self.rnn_cells()  # type: RNNCellTuple
            outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                self.inputs,
                self._input_lengths,
                dtype=tf.float32)

            self.hidden_states = tf.concat(outputs_bidi_tup, 2)

            with tf.variable_scope('attention_tensor'):
                self.__attention_tensor = dropout(self.hidden_states,
                                                  self.dropout_keep_p,
                                                  self.train_mode)

            self.encoded = tf.concat(encoded_tup, 1)

        log("RNN encoder initialized")
예제 #14
0
    def __init__(self,
                 name: str,
                 vocabularies: List[Vocabulary],
                 data_ids: List[str],
                 embedding_sizes: List[int],
                 rnn_size: int,
                 attention_state_size: int = None,
                 max_input_len: int = None,
                 dropout_keep_prob: float = 1.0,
                 rnn_cell: str = "GRU",
                 attention_type: type = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Construct a new instance of the factored encoder.

        Args:
            vocabularies: List of vocabularies indexed
            data_ids: List of data series IDs
            embedding_sizes: List of embedding sizes for each data series
            name: The name for this encoder.
            rnn_size: The size of the hidden state

        Keyword arguments:
            attention_state_size: The size of the attention hidden state
            max_input_len: Maximum input length (longer sequences are trimmed)
            attention_type: The attention to use.
            dropout_keep_prob: Dropout keep probability
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type,
                           attention_state_size=attention_state_size)

        check_argument_types()

        self.vocabularies = vocabularies
        self.vocabulary_sizes = [len(voc) for voc in self.vocabularies]
        self.data_ids = data_ids
        self.embedding_sizes = embedding_sizes
        self.rnn_size = rnn_size

        self.max_input_len = max_input_len
        self.dropout_keep_prob = dropout_keep_prob
        self.rnn_cell_str = rnn_cell

        if not (len(self.data_ids)
                == len(self.vocabularies)
                == len(self.embedding_sizes)):
            raise ValueError("data_ids, vocabularies, and embedding_sizes "
                             "lists need to have the same length")

        if max_input_len is not None and max_input_len <= 0:
            raise ValueError("Input length must be a positive integer.")

        if any([esize <= 0 for esize in embedding_sizes]):
            raise ValueError("Embedding size must be a positive integer.")

        if rnn_size <= 0:
            raise ValueError("RNN size must be a positive integer.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if self.rnn_cell_str not in RNN_CELL_TYPES:
            raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
예제 #15
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 network_type: str,
                 attention_layer: Optional[str] = None,
                 attention_state_size: Optional[int] = None,
                 attention_type: Type = Attention,
                 fine_tune: bool = False,
                 encoded_layer: Optional[str] = None,
                 load_checkpoint: Optional[str] = None,
                 save_checkpoint: Optional[str] = None) -> None:
        """Initialize pre-trained ImageNet network.

        Args:
            name: Name of the model part (the ImageNet network, will be in its
                scope, independently on `name`).
            data_id: Id of series with images (list of 3D numpy arrays)
            network_type: Identifier of ImageNet network from TFSlim.
            attention_layer: String identifier of the convolutional map
                (model's endpoint) that will be used for attention. Check
                TFSlim documentation for end point specifications.
            attention_state_size: Dimensionality of state projection in
                attention computation.
            attention_type: Type of attention.
            fine_tune: Flag whether the network should be further trained with
                the rest of the model.
            encoded_layer: String id of the network layer that will be used as
                input of a decoder. `None` means averaging the convolutional
                maps.
            load_checkpoint: Checkpoint file from which the pre-trained network
                is loaded.
            save_checkpoint: Checkpoint file where the encoder is saved after
                the training. (Makes sense only if `fine_tune` is set to
                `True`).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type,
                           attention_state_size=attention_state_size)

        if attention_layer is None and attention_type is not None:
            raise ValueError("Attention type is set, although "
                             "attention layer is not specified.")

        if save_checkpoint is not None and not fine_tune:
            warn("The ImageNet network is not fine-tuned and still it is set "
                 "to save after the training is finished.")

        self.data_id = data_id
        self.network_type = network_type
        self.attention_layer = attention_layer
        self.encoded_layer = encoded_layer
        self.fine_tune = fine_tune

        if self.network_type not in SUPPORTED_NETWORKS:
            raise ValueError(
                "Network '{}' is not among the supoort ones ({})".format(
                    self.network_type, ", ".join(SUPPORTED_NETWORKS.keys())))

        scope, net_function = SUPPORTED_NETWORKS[self.network_type]
        with tf_slim.arg_scope(scope()):
            _, self.end_points = net_function(self.input_image)

        if (self.attention_layer is not None and
                self.attention_layer not in self.end_points):
            raise ValueError(
                "Network '{}' does not contain endpoint '{}'".format(
                    self.network_type, self.attention_layer))

        if attention_layer is not None:
            net_output = self.end_points[self.attention_layer]
            if len(net_output.get_shape()) != 4:
                raise ValueError(
                    ("Endpoint '{}' for network '{}' cannot be "
                     "a convolutional map, its dimensionality is: {}."
                    ).format(self.attention_layer, self.network_type,
                             ", ".join([str(d.value) for d in
                                        net_output.get_shape()])))

        if (self.encoded_layer is not None
                and self.encoded_layer not in self.end_points):
            raise ValueError(
                "Network '{}' does not contain endpoint '{}'.".format(
                    self.network_type, self.encoded_layer))
예제 #16
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 rnn_size: int,
                 max_input_len: Optional[int]=None,
                 dropout_keep_prob: float=1.0,
                 attention_type: Optional[Any]=None,
                 attention_fertility: int=3,
                 use_noisy_activations: bool=False,
                 parent_encoder: Optional["SentenceEncoder"]=None,
                 save_checkpoint: Optional[str]=None,
                 load_checkpoint: Optional[str]=None) -> None:
        """Create a new instance of the sentence encoder.

        Arguments:
            vocabulary: Input vocabulary
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            max_input_len: Maximum length of an encoded sequence
            embedding_size: The size of the embedding vector assigned
                to each word
            rnn_size: The size of the encoder's hidden state. Note
                that the actual encoder output state size will be
                twice as long because it is the result of
                concatenation of forward and backward hidden states.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
            attention_fertility: Fertility parameter used with
                CoverageAttention (default 3).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(
            self, attention_type, attention_fertility=attention_fertility)

        assert check_argument_types()

        self.vocabulary = vocabulary
        self.data_id = data_id

        self.max_input_len = max_input_len
        self.embedding_size = embedding_size
        self.rnn_size = rnn_size
        self.dropout_keep_p = dropout_keep_prob
        self.use_noisy_activations = use_noisy_activations
        self.parent_encoder = parent_encoder

        if max_input_len is not None and max_input_len <= 0:
            raise ValueError("Input length must be a positive integer.")

        log("Initializing sentence encoder, name: '{}'"
            .format(self.name))

        with self.use_scope():
            self._create_input_placeholders()
            with tf.variable_scope('input_projection'):
                self._create_embedding_matrix()
                embedded_inputs = self._embed(self.inputs)  # type: tf.Tensor
                self.embedded_inputs = embedded_inputs

            fw_cell, bw_cell = self.rnn_cells()  # type: RNNCellTuple
            outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn(
                fw_cell, bw_cell, embedded_inputs,
                sequence_length=self.sentence_lengths,
                dtype=tf.float32)

            self.hidden_states = tf.concat(outputs_bidi_tup, 2)

            with tf.variable_scope('attention_tensor'):
                self.__attention_tensor = self._dropout(
                    self.hidden_states)

            self.encoded = tf.concat(encoded_tup, 1)

        log("Sentence encoder initialized")
예제 #17
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 network_type: str,
                 attention_layer: Optional[str],
                 attention_state_size: int,
                 attention_type: Type=Attention,
                 fine_tune: bool=False,
                 encoded_layer: Optional[str]=None,
                 load_checkpoint: Optional[str]=None,
                 save_checkpoint: Optional[str]=None) -> None:
        """Initialize pre-trained ImageNet network.

        Args:
            name: Name of the model part (the ImageNet network, will be in its
                scope, independently on `name`).
            data_id: Id of series with images (list of 3D numpy arrays)
            network_type: Identifier of ImageNet network from TFSlim.
            attention_layer: String identifier of the convolutional map
                (model's endpoint) that will be used for attention. Check
                TFSlim documentation for end point specifications.
            attention_state_size: Dimensionality of state projection in
                attention computation.
            attention_type: Type of attention.
            fine_tune: Flag whether the network should be further trained with
                the rest of the model.
            encoded_layer: String id of the network layer that will be used as
                input of a decoder. `None` means averaging the convolutional
                maps.
            load_checkpoint: Checkpoint file from which the pre-trained network
                is loaded.
            save_checkpoint: Checkpoint file where the encoder is saved after
                the training. (Makes sense only if `fine_tune` is set to
                `True`).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type,
                           attention_state_size=attention_state_size)

        if attention_layer is None and attention_type is not None:
            raise ValueError("Attention type is set, although "
                             "attention layer is not specified.")

        if save_checkpoint is not None and not fine_tune:
            warn("The ImageNet network is not fine-tuned and still it is set "
                 "to save after the training is finished.")

        self.data_id = data_id
        self._network_type = network_type
        self.input_plc = tf.placeholder(
            tf.float32, [None, self.HEIGHT, self.WIDTH, 3])

        if network_type not in SUPPORTED_NETWORKS:
            raise ValueError(
                "Network '{}' is not among the supoort ones ({})".format(
                    network_type, ", ".join(SUPPORTED_NETWORKS.keys())))

        scope, net_function = SUPPORTED_NETWORKS[network_type]
        with tf_slim.arg_scope(scope()):
            _, end_points = net_function(self.input_plc)

        with tf.variable_scope(self.name):
            if attention_layer is not None:

                if attention_layer not in end_points:
                    raise ValueError(
                        "Network '{}' does not contain endpoint '{}'".format(
                            network_type, attention_layer))

                net_output = end_points[attention_layer]

                if len(net_output.get_shape()) != 4:
                    raise ValueError((
                        "Endpoint '{}' for network '{}' cannot be "
                        "a convolutional map, its dimensionality is: {}."
                        ).format(attention_layer, network_type,
                                 ", ".join([str(d.value) for d in
                                            net_output.get_shape()])))

                if not fine_tune:
                    net_output = tf.stop_gradient(net_output)
                # pylint: disable=no-member
                shape = [s.value for s in net_output.get_shape()[1:]]
                # pylint: enable=no-member
                self.__attention_tensor = tf.reshape(
                    net_output, [-1, shape[0] * shape[1], shape[2]])

            if encoded_layer is not None:
                if encoded_layer not in end_points:
                    raise ValueError(
                        "Network '{}' does not contain endpoint '{}'.".format(
                            network_type, encoded_layer))

                self.encoded = tf.squeeze(end_points[encoded_layer], [1, 2])
                if not fine_tune:
                    self.encoded = tf.stop_gradient(self.encoded)
            else:
                self.encoded = tf.reduce_mean(net_output, [1, 2])
예제 #18
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 input_size: int,
                 rnn_layers: List[RNNSpecTuple],
                 max_input_len: Optional[int] = None,
                 dropout_keep_prob: float = 1.0,
                 attention_type: Optional[Any] = None,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Creates a new instance of the encoder.

        Arguments:
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            rnn_layers: A list of tuples specifying the size and, optionally,
                the direction ('forward', 'backward' or 'both') and cell type
                ('GRU' or 'LSTM') of each RNN layer.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)
        check_argument_types()

        self.data_id = data_id

        self._rnn_layers = [_make_rnn_spec(*r) for r in rnn_layers]
        self.max_input_len = max_input_len
        self.input_size = input_size
        self.dropout_keep_prob = dropout_keep_prob

        log("Initializing RNN encoder, name: '{}'".format(self.name))

        with self.use_scope():
            self._create_input_placeholders()

            self.states_mask = tf.sequence_mask(self._input_lengths,
                                                dtype=tf.float32)

            states = self.inputs
            states_reversed = False

            def reverse_states():
                nonlocal states, states_reversed
                states = tf.reverse_sequence(states,
                                             self._input_lengths,
                                             batch_dim=0,
                                             seq_dim=1)
                states_reversed = not states_reversed

            for i, layer in enumerate(self._rnn_layers):
                with tf.variable_scope('rnn_{}_{}'.format(i, layer.direction)):
                    cell = _make_rnn_cell(layer)
                    if layer.direction == 'both':
                        outputs_tup, encoded_tup = (
                            tf.nn.bidirectional_dynamic_rnn(
                                cell(),
                                cell(),
                                states,
                                self._input_lengths,
                                dtype=tf.float32))

                        if states_reversed:
                            # treat forward as backward and vice versa
                            outputs_tup = tuple(reversed(outputs_tup))
                            encoded_tup = tuple(reversed(encoded_tup))
                            states_reversed = False

                        states = tf.concat(outputs_tup, 2)
                        encoded = tf.concat(encoded_tup, 1)
                    elif layer.direction in ['forward', 'backward']:
                        should_be_reversed = (layer.direction == 'backward')
                        if states_reversed != should_be_reversed:
                            reverse_states()

                        states, encoded = tf.nn.dynamic_rnn(
                            cell(),
                            states,
                            sequence_length=self._input_lengths,
                            dtype=tf.float32)
                    else:
                        raise ValueError("Unknown RNN direction {}".format(
                            layer.direction))

                if i < len(self._rnn_layers) - 1:
                    states = dropout(states, self.dropout_keep_prob,
                                     self.train_mode)

            if states_reversed:
                reverse_states()

            self.hidden_states = states
            self.encoded = encoded

            with tf.variable_scope('attention_tensor'):
                self.__attention_tensor = dropout(self.hidden_states,
                                                  self.dropout_keep_prob,
                                                  self.train_mode)

        log("RNN encoder initialized")
예제 #19
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Tuple[int, int, Optional[int]]],
                 image_height: int,
                 image_width: int,
                 pixel_dim: int,
                 batch_normalization: bool = True,
                 local_response_normalization: bool = True,
                 dropout_keep_prob: float = 0.5,
                 attention_type: Type = Attention,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Initialize a convolutional network for image processing.

        Args:
            convolutions (list): Configuration convolutional layers. It is a
                list of tripplets of integers where the values are: size of the
                convolutional window, number of convolutional filters, and size
                of max-pooling window. If the max-pooling size is set to None,
                no pooling is performed.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the images (padded)
            pixel_dim: Number of color channels in the input images.
            batch_normalization: Flag whether the batch normalization
                should be used between the convolutional layers.
            local_response_normalization: Flag whether to use local
                response normalization between the convolutional layers.
            dropout_placeholder: Placeholder keeping the
                dropout keeping probability

        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self, attention_type)

        self.convolutions = convolutions
        self.data_id = data_id
        self.image_height = image_height
        self.image_width = image_width
        self.pixel_dim = pixel_dim
        self.dropout_keep_prob = dropout_keep_prob

        with tf.variable_scope(name):
            self.dropout_placeholder = tf.placeholder(tf.float32,
                                                      name="dropout")
            self.is_training = tf.placeholder(tf.bool, name="is_training")
            self.input_op = tf.placeholder(tf.float32,
                                           shape=(None, image_height,
                                                  image_width, pixel_dim),
                                           name="input_images")

            self.padding_masks = tf.placeholder(tf.float32,
                                                shape=(None, image_height,
                                                       image_width, 1),
                                                name="padding_masks")

            last_layer = self.input_op
            last_padding_masks = self.padding_masks
            last_n_channels = pixel_dim

            self.is_training = tf.placeholder(tf.bool, name="is_training")
            self.image_processing_layers = []  # type: List[tf.Tensor]

            with tf.variable_scope("convolutions"):
                for i, (filter_size, n_filters,
                        pool_size) in enumerate(convolutions):
                    with tf.variable_scope("cnn_layer_{}".format(i)):
                        last_layer = _convolution(last_layer, last_n_channels,
                                                  filter_size, n_filters)
                        last_n_channels = n_filters
                        self.image_processing_layers.append(last_layer)

                        if pool_size:
                            # TODO do the pooling properly
                            last_layer = tf.nn.max_pool(
                                last_layer, [1, 2, 2, 1], [1, 2, 2, 1], "SAME")
                            last_padding_masks = tf.nn.max_pool(
                                last_padding_masks, [1, 2, 2, 1], [1, 2, 2, 1],
                                "SAME")
                            self.image_processing_layers.append(last_layer)
                            assert image_height % 2 == 0
                            image_height //= 2
                            assert image_width % 2 == 0
                            image_width //= 2

                        if local_response_normalization:
                            last_layer = tf.nn.local_response_normalization(
                                last_layer)

                        if batch_normalization:
                            last_layer = _batch_norm(last_layer, n_filters,
                                                     self.is_training)

                        last_layer = tf.nn.dropout(
                            last_layer, keep_prob=self.dropout_placeholder)

                # last_layer shape is batch X height X width X channels
                last_layer = last_layer * last_padding_masks

            # we average out by the image size -> shape is number
            # channels from the last convolution
            self.encoded = tf.reduce_mean(last_layer, [1, 2])
            # TODO assert shape
            assert_shape(self.encoded, [None, self.convolutions[-1][1]])

            self.__attention_tensor = tf.reshape(
                last_layer, [-1, image_width, last_n_channels * image_height])

            self.__attention_mask = tf.squeeze(
                tf.reduce_prod(last_padding_masks, [1]), [2])
예제 #20
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 segment_size: int,
                 highway_depth: int,
                 rnn_size: int,
                 filters: List[Tuple[int, int]],
                 max_input_len: Optional[int] = None,
                 dropout_keep_prob: float = 1.0,
                 attention_type: Optional[Any] = None,
                 attention_fertility: int = 3,
                 use_noisy_activations: bool = False,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        """Create a new instance of the sentence encoder.

        Arguments:
            vocabulary: Input vocabulary
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            max_input_len: Maximum length of an encoded sequence
            embedding_size: The size of the embedding vector assigned
                to each word
            segment_size: The size of the segments over which we apply
                max-pooling.
            highway_depth: Depth of the highway layer.
            rnn_size: The size of the encoder's hidden state. Note
                that the actual encoder output state size will be
                twice as long because it is the result of
                concatenation of forward and backward hidden states.
            filters: Specification of CNN filters. It is a list of tuples
                specifying the filter size and number of channels.

        Keyword arguments:
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
            attention_type: The class that is used for creating
                attention mechanism (default None)
            attention_fertility: Fertility parameter used with
                CoverageAttention (default 3).
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        Attentive.__init__(self,
                           attention_type,
                           attention_fertility=attention_fertility)

        assert check_argument_types()

        self.vocabulary = vocabulary
        self.data_id = data_id

        self.max_input_len = max_input_len
        self.embedding_size = embedding_size
        self.segment_size = segment_size
        self.highway_depth = highway_depth
        self.rnn_size = rnn_size
        self.filters = filters
        self.dropout_keep_p = dropout_keep_prob
        self.use_noisy_activations = use_noisy_activations

        if max_input_len is not None and max_input_len <= 0:
            raise ValueError("Input length must be a positive integer.")

        log("Initializing sentence encoder, name: '{}'".format(self.name))

        with self.use_scope():
            self._create_input_placeholders()
            with tf.variable_scope('input_projection'):
                self._create_embedding_matrix()
                embedded_inputs = self._embed(self.inputs)  # type: tf.Tensor
                self.embedded_inputs = embedded_inputs

            # CNN Network
            pooled_outputs = []
            for filter_size, num_filters in self.filters:
                with tf.variable_scope("conv-maxpool-%s" % filter_size):
                    filter_shape = [filter_size, embedding_size, num_filters]
                    w_filter = tf.get_variable(
                        "conv_W",
                        filter_shape,
                        initializer=tf.random_uniform_initializer(-0.5, 0.5))
                    b_filter = tf.get_variable(
                        "conv_bias", [num_filters],
                        initializer=tf.constant_initializer(0.0))
                    conv = tf.nn.conv1d(embedded_inputs,
                                        w_filter,
                                        stride=1,
                                        padding="SAME",
                                        name="conv")

                    # Apply nonlinearity
                    conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                    # Max-pooling over the output segments
                    expanded_conv_relu = tf.expand_dims(conv_relu, -1)
                    pooled = tf.nn.max_pool(
                        expanded_conv_relu,
                        ksize=[1, self.segment_size, 1, 1],
                        strides=[1, self.segment_size, 1, 1],
                        padding="SAME",
                        name="maxpool")
                    pooled_outputs.append(pooled)

            # Combine all the pooled features
            self.cnn_encoded = tf.concat(pooled_outputs, axis=2)
            self.cnn_encoded = tf.squeeze(self.cnn_encoded, [3])

            # Highway Network
            batch_size = tf.shape(self.cnn_encoded)[0]
            # pylint: disable=no-member
            cnn_out_size = self.cnn_encoded.get_shape().as_list()[-1]
            highway_layer = tf.reshape(self.cnn_encoded, [-1, cnn_out_size])
            for i in range(self.highway_depth):
                highway_layer = highway(highway_layer,
                                        scope=("highway_layer_%s" % i))
            highway_layer = tf.reshape(highway_layer,
                                       [batch_size, -1, cnn_out_size])

            # BiRNN Network
            fw_cell, bw_cell = self.rnn_cells()  # type: RNNCellTuple
            seq_lens = tf.ceil(
                tf.divide(self.sentence_lengths, self.segment_size))
            seq_lens = tf.cast(seq_lens, tf.int32)
            outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                highway_layer,
                sequence_length=seq_lens,
                dtype=tf.float32)

            self.hidden_states = tf.concat(outputs_bidi_tup, 2)

            with tf.variable_scope('attention_tensor'):
                self.__attention_tensor = self._dropout(self.hidden_states)

            self.encoded = tf.concat(encoded_tup, 1)

        log("Sentence encoder initialized")