Ejemplo n.º 1
0
 def __init__(self,
              name: str,
              cnn: CNNEncoder) -> None:
     check_argument_types()
     ModelPart.__init__(
         self, name, save_checkpoint=None, load_checkpoint=None)
     self._cnn = cnn
Ejemplo n.º 2
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 max_output_len: int,
                 dropout_keep_prob: float = 1.0,
                 embedding_size: int = None,
                 embeddings_source: EmbeddedSequence = None,
                 tie_embeddings: bool = False,
                 label_smoothing: float = None,
                 supress_unk: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize parameters common for all autoregressive decoders.

        Arguments:
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects.
            vocabulary: Target vocabulary.
            data_id: Target data series.
            max_output_len: Maximum length of an output sequence.
            reuse: Reuse the variables from the model part.
            dropout_keep_prob: Probability of keeping a value during dropout.
            embedding_size: Size of embedding vectors for target words.
            embeddings_source: Embedded sequence to take embeddings from.
            tie_embeddings: Use decoder.embedding_matrix also in place
                of the output decoding matrix.
            label_smoothing: Label smoothing parameter.
            supress_unk: If true, decoder will not produce symbols for unknown
                tokens.
        """
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_output_len = max_output_len
        self.dropout_keep_prob = dropout_keep_prob
        self._embedding_size = embedding_size
        self.embeddings_source = embeddings_source
        self.label_smoothing = label_smoothing
        self.tie_embeddings = tie_embeddings
        self.supress_unk = supress_unk

        self.encoder_states = lambda: []  # type: Callable[[], List[tf.Tensor]]
        self.encoder_masks = lambda: []  # type: Callable[[], List[tf.Tensor]]

        # Check the values of the parameters (max_output_len, ...)
        if self.max_output_len <= 0:
            raise ValueError(
                "Maximum sequence length must be a positive integer.")

        if self._embedding_size is not None and self._embedding_size <= 0:
            raise ValueError("Embedding size must be a positive integer.")

        if self.dropout_keep_prob < 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep probability must be a real number "
                             "in the interval [0,1].")
    def __init__(self,
                 encoder: SentenceEncoder,
                 decoder: Decoder,
                 data_id: str,
                 name: str) -> None:
        ModelPart.__init__(self, name, None, None)

        self.encoder = encoder
        self.decoder = decoder
        self.data_id = data_id

        self.ref_alignment = tf.placeholder(
            tf.float32,
            [None, self.decoder.max_output_len, self.encoder.max_input_len],
            name="ref_alignment")

        # shape will be [max_output_len, batch_size, max_input_len]
        self.alignment_target = tf.transpose(self.ref_alignment,
                                             perm=[1, 0, 2])

        _, self.train_loss = self._make_decoder(runtime_mode=False)
        self.decoded, self.runtime_loss = self._make_decoder(runtime_mode=True)

        tf.summary.scalar("alignment_train_xent", self.train_loss,
                          collections=["summary_train"])
Ejemplo n.º 4
0
 def __init__(self,
              name: str,
              cnn: CNNEncoder) -> None:
     check_argument_types()
     ModelPart.__init__(
         self, name, save_checkpoint=None, load_checkpoint=None)
     self._cnn = cnn
Ejemplo n.º 5
0
    def __init__(self,
                 name: str,
                 encoder: TemporalStateful,
                 vocabulary: Vocabulary,
                 data_id: str,
                 max_length: int = None,
                 merge_repeated_targets: bool = False,
                 merge_repeated_outputs: bool = True,
                 beam_width: int = 1,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoder = encoder
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_length = max_length

        self.merge_repeated_targets = merge_repeated_targets
        self.merge_repeated_outputs = merge_repeated_outputs
        self.beam_width = beam_width
Ejemplo n.º 6
0
    def __init__(self,
                 name: str,
                 parent_decoder: AutoregressiveDecoder,
                 beam_size: int,
                 max_steps: int,
                 length_normalization: float) -> None:
        """Construct the beam search decoder graph.

        Arguments:
            name: The name for the model part.
            parent_decoder: An autoregressive decoder from which to sample.
            beam_size: The number of hypotheses in the beam.
            max_steps: The maximum number of time steps to perform.
            length_normalization: The alpha parameter from Eq. 14 in the paper.
        """
        check_argument_types()
        ModelPart.__init__(self, name)

        self.parent_decoder = parent_decoder
        self.beam_size = beam_size
        self.length_normalization = length_normalization
        self.max_steps_int = max_steps

        # Create a placeholder for maximum number of steps that is necessary
        # during ensembling, when the decoder is called repetitively with the
        # max_steps attribute set to one.
        self.max_steps = tf.placeholder_with_default(self.max_steps_int, [])

        self._initial_loop_state = None  # type: Optional[BeamSearchLoopState]
Ejemplo n.º 7
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 max_output_len: int,
                 dropout_keep_prob: float = 1.0,
                 embedding_size: int = None,
                 embeddings_source: EmbeddedSequence = None,
                 tie_embeddings: bool = False,
                 label_smoothing: float = None,
                 supress_unk: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize parameters common for all autoregressive decoders.

        Arguments:
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects.
            vocabulary: Target vocabulary.
            data_id: Target data series.
            max_output_len: Maximum length of an output sequence.
            reuse: Reuse the variables from the model part.
            dropout_keep_prob: Probability of keeping a value during dropout.
            embedding_size: Size of embedding vectors for target words.
            embeddings_source: Embedded sequence to take embeddings from.
            tie_embeddings: Use decoder.embedding_matrix also in place
                of the output decoding matrix.
            label_smoothing: Label smoothing parameter.
            supress_unk: If true, decoder will not produce symbols for unknown
                tokens.
        """
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_output_len = max_output_len
        self.dropout_keep_prob = dropout_keep_prob
        self._embedding_size = embedding_size
        self.embeddings_source = embeddings_source
        self.label_smoothing = label_smoothing
        self.tie_embeddings = tie_embeddings
        self.supress_unk = supress_unk

        self.encoder_states = lambda: []  # type: Callable[[], List[tf.Tensor]]
        self.encoder_masks = lambda: []  # type: Callable[[], List[tf.Tensor]]

        # Check the values of the parameters (max_output_len, ...)
        if self.max_output_len <= 0:
            raise ValueError(
                "Maximum sequence length must be a positive integer.")

        if self._embedding_size is not None and self._embedding_size <= 0:
            raise ValueError("Embedding size must be a positive integer.")

        if self.dropout_keep_prob < 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep probability must be a real number "
                             "in the interval [0,1].")
    def __init__(self,
                 name: str,
                 input_shape: List[int],
                 output_shape: int,
                 data_id: str,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        assert len(input_shape) == 3
        if output_shape <= 0:
            raise ValueError("Output vector dimension must be postive.")

        self.data_id = data_id

        with self.use_scope():
            features_shape = [None] + input_shape  # type: ignore
            self.image_features = tf.placeholder(tf.float32,
                                                 shape=features_shape,
                                                 name="image_input")

            self.flat = tf.reduce_mean(self.image_features,
                                       axis=[1, 2],
                                       name="average_image")

            self.project_w = tf.get_variable(
                name="img_init_proj_W",
                shape=[input_shape[2], output_shape],
                initializer=tf.glorot_normal_initializer())
            self.project_b = tf.get_variable(
                name="img_init_b",
                shape=[output_shape],
                initializer=tf.zeros_initializer())
Ejemplo n.º 9
0
    def __init__(self,
                 name: str,
                 dimension: int,
                 data_id: str,
                 output_shape: int = None,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Instantiate StatefulFiller.

        Arguments:
            name: Name of the model part.
            dimension: Dimensionality of the input.
            data_id: Series containing the numpy objects.
            output_shape: Dimension of optional state projection.
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.data_id = data_id
        self.dimension = dimension
        self.output_shape = output_shape

        if self.dimension <= 0:
            raise ValueError("Input vector dimension must be positive.")
        if self.output_shape is not None and self.output_shape <= 0:
            raise ValueError("Output vector dimension must be positive.")

        with self.use_scope():
            self.vector = tf.placeholder(tf.float32, [None, self.dimension],
                                         "input_vector")
Ejemplo n.º 10
0
    def __init__(self,
                 name: str,
                 input_sequence: Attendable,
                 hidden_size: int,
                 num_heads: int,
                 output_size: int = None,
                 state_proj_size: int = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize an instance of the encoder."""
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.input_sequence = input_sequence
        self.hidden_size = hidden_size
        self.num_heads = num_heads
        self.output_size = output_size
        self.state_proj_size = state_proj_size
        self.dropout_keep_prob = dropout_keep_prob

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")
Ejemplo n.º 11
0
    def __init__(self,
                 name: str,
                 encoders: List[Stateful],
                 data_id: str,
                 layers: List[int] = None,
                 activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu,
                 dropout_keep_prob: float = 1.0,
                 dimension: int = 1,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)
        assert check_argument_types()

        self.encoders = encoders
        self.data_id = data_id
        self.max_output_len = 1
        self.dimension = dimension

        self._layers = layers
        self._activation_fn = activation_fn
        self._dropout_keep_prob = dropout_keep_prob

        tf.summary.scalar("val_optimization_cost",
                          self.cost,
                          collections=["summary_val"])
        tf.summary.scalar("train_optimization_cost",
                          self.cost,
                          collections=["summary_train"])
Ejemplo n.º 12
0
    def __init__(self,
                 name: str,
                 encoders: List[TemporalStateful],
                 vocabulary: Vocabulary,
                 data_id: str,
                 max_output_len: int = None,
                 hidden_dim: int = None,
                 activation: Callable = tf.nn.relu,
                 dropout_keep_prob: float = 1.0,
                 add_start_symbol: bool = False,
                 add_end_symbol: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoders = encoders
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_output_len = max_output_len
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.dropout_keep_prob = dropout_keep_prob
        self.add_start_symbol = add_start_symbol
        self.add_end_symbol = add_end_symbol
Ejemplo n.º 13
0
    def __init__(self,
                 name: str,
                 input_sequence: Attendable,
                 hidden_size: int,
                 num_heads: int,
                 output_size: int = None,
                 state_proj_size: int = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize an instance of the encoder."""
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.input_sequence = input_sequence
        self.hidden_size = hidden_size
        self.num_heads = num_heads
        self.output_size = output_size
        self.state_proj_size = state_proj_size
        self.dropout_keep_prob = dropout_keep_prob

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")
Ejemplo n.º 14
0
    def __init__(self,
                 encoder: RecurrentEncoder,
                 decoder: Decoder,
                 data_id: str,
                 name: str,
                 reuse: ModelPart = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, None, None, initializers)

        self.encoder = encoder
        self.decoder = decoder
        self.data_id = data_id

        if not isinstance(self.encoder.input_sequence, Sequence):
            raise TypeError("Expected Sequence type in encoder.input_sequence")

        self.enc_input = cast(Sequence, self.encoder.input_sequence)

        # TODO this is here to call the lazy properties which create
        # the list of attention distribbutions
        # pylint: disable=pointless-statement
        self.decoder.runtime_logits
        self.decoder.train_logits
        # pylint: enable=pointless-statement

        _, self.train_loss = self._make_decoder(runtime_mode=False)
        self.decoded, self.runtime_loss = self._make_decoder(runtime_mode=True)

        tf.summary.scalar("alignment_train_xent",
                          self.train_loss,
                          collections=["summary_train"])
Ejemplo n.º 15
0
    def __init__(self,
                 name: str,
                 input_sequence: EmbeddedSequence,
                 conv_features: int,
                 encoder_layers: int,
                 kernel_width: int = 5,
                 dropout_keep_prob: float = 1.0,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)

        self.input_sequence = input_sequence
        self.encoder_layers = encoder_layers
        self.conv_features = conv_features
        self.kernel_width = kernel_width
        self.dropout_keep_prob = dropout_keep_prob

        if conv_features <= 0:
            raise ValueError("Number of features must be a positive integer.")
        if encoder_layers <= 0:
            raise ValueError(
                "Number of encoder layers must be a positive integer.")

        if self.input_sequence.max_length is None:
            raise ValueError("Input sequence must have a maximum length for "
                             "positional embeddings with this encoder")
        self.max_input_length = self.input_sequence.max_length

        log("Initializing convolutional seq2seq encoder, name {}".format(
            self.name))
Ejemplo n.º 16
0
    def __init__(self,
                 name: str,
                 dimension: int,
                 data_id: str,
                 output_shape: int = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        check_argument_types()

        if dimension <= 0:
            raise ValueError("Input vector dimension must be postive.")
        if output_shape is not None and output_shape <= 0:
            raise ValueError("Output vector dimension must be postive.")

        self.vector = tf.placeholder(tf.float32, shape=[None, dimension])
        self.data_id = data_id

        with self.use_scope():
            if output_shape is not None and dimension != output_shape:
                project_w = tf.get_variable(shape=[dimension, output_shape],
                                            name="img_init_proj_W")
                project_b = tf.get_variable(name="img_init_b",
                                            shape=[output_shape],
                                            initializer=tf.zeros_initializer())

                self._encoded = tf.matmul(self.vector, project_w) + project_b
            else:
                self._encoded = self.vector
Ejemplo n.º 17
0
    def __init__(
            self,
            name: str,
            parent: TemporalStateful,
            factor: int,
            projection_size: int = None,
            projection_activation: Activation = None) -> None:
        """Initialize SentenceSplitter.

        Args:
            parent: TemporalStateful whose states will be split.
            factor: Factor by which the states will be split - the  resulting
                sequence will be longer by this factor.
            projection_size: If not None, specifies dimensionality of a
                projection before state splitting.
            projection_activation: Non-linearity function for the optional
                projection.
        """
        check_argument_types()

        ModelPart.__init__(
            self, name=name, save_checkpoint=None, load_checkpoint=None,
            initializers=None)
        self.parent = parent
        self.factor = factor
        self.projection_size = projection_size
        self.activation = projection_activation

        if projection_size is not None and projection_size % factor != 0:
            raise ValueError((
                "Dimension of projection ({}) must be "
                "dividable by the given factor ({}).").format(
                    projection_size, factor))
Ejemplo n.º 18
0
    def __init__(self,
                 name: str,
                 dimension: int,
                 data_id: str,
                 output_shape: int = None,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Instantiate StatefulFiller.

        Arguments:
            name: Name of the model part.
            dimension: Dimensionality of the input.
            data_id: Series containing the numpy objects.
            output_shape: Dimension of optional state projection.
        """
        check_argument_types()
        ModelPart.__init__(
            self, name, reuse, save_checkpoint, load_checkpoint, initializers)

        self.data_id = data_id
        self.dimension = dimension
        self.output_shape = output_shape

        if self.dimension <= 0:
            raise ValueError("Input vector dimension must be positive.")
        if self.output_shape is not None and self.output_shape <= 0:
            raise ValueError("Output vector dimension must be positive.")
Ejemplo n.º 19
0
    def __init__(self,
                 name: str,
                 encoder: TemporalStateful,
                 vocabulary: Vocabulary,
                 data_id: str,
                 max_length: int = None,
                 merge_repeated_targets: bool = False,
                 merge_repeated_outputs: bool = True,
                 beam_width: int = 1,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoder = encoder
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_length = max_length

        self.merge_repeated_targets = merge_repeated_targets
        self.merge_repeated_outputs = merge_repeated_outputs
        self.beam_width = beam_width
Ejemplo n.º 20
0
    def __init__(self,
                 name: str,
                 input_shape: List[int],
                 data_id: str,
                 projection_dim: int = None,
                 ff_hidden_dim: int = None,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Instantiate SpatialFiller.

        Args:
            name: Name of the model part.
            input_shape: Dimensionality of the input.
            data_id: Name of the data series with numpy objects.
            projection_dim: Optional, dimension of the states projection.
        """
        check_argument_types()
        ModelPart.__init__(
            self, name, reuse, save_checkpoint, load_checkpoint, initializers)

        self.data_id = data_id
        self.input_shape = input_shape
        self.projection_dim = projection_dim
        self.ff_hidden_dim = ff_hidden_dim

        if self.ff_hidden_dim is not None and self.projection_dim is None:
            raise ValueError(
                "projection_dim must be provided when using ff_hidden_dim")

        if len(self.input_shape) != 3:
            raise ValueError("The input shape should have 3 dimensions.")
Ejemplo n.º 21
0
    def __init__(self,
                 name: str,
                 encoder: Union[RecurrentEncoder, SentenceEncoder],
                 vocabulary: Vocabulary,
                 data_id: str,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoder = encoder
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        self.rnn_size = int(self.encoder.temporal_states.get_shape()[-1])

        with self.use_scope():
            self.train_targets = tf.placeholder(tf.int32, [None, None],
                                                "labeler_targets")
            self.train_weights = tf.placeholder(tf.float32, [None, None],
                                                "labeler_padding_weights")
Ejemplo n.º 22
0
    def __init__(self,
                 name: str,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        self.query_state_size = None  # type: tf.Tensor
        self._histories = {}  # type: Dict[str, tf.Tensor]
Ejemplo n.º 23
0
    def __init__(self,
                 name: str,
                 input_sequence: TemporalStateful,
                 rnn_layers: List[RNNSpecTuple],
                 add_residual: bool = False,
                 add_layer_norm: bool = False,
                 include_final_layer_norm: bool = True,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create a new instance of a recurrent encoder.

        Arguments:
            name: ModelPart name.
            input_seqeunce: The input sequence for the encoder.
            rnn_size: The dimension of the RNN hidden state vector.
            rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory
                cell to use.
            rnn_direction: One of "forward", "backward", "bidirectional". In
                what order to process the input sequence. Note that choosing
                "bidirectional" will double the resulting vector dimension as
                well as the number of encoder parameters.
            add_residual: Add residual connections to the RNN layer output.
            add_layer_norm: Add layer normalization after each RNN layer.
            include_final_layer_norm: Normalize also output of the network.
            dropout_keep_prob: 1 - dropout probability.
            save_checkpoint: ModelPart save checkpoint file.
            load_checkpoint: ModelPart load checkpoint file.
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)
        TemporalStatefulWithOutput.__init__(self)

        self.input_sequence = input_sequence
        self.dropout_keep_prob = dropout_keep_prob
        self.rnn_specs = [_make_rnn_spec(*r) for r in rnn_layers]
        self.add_residual = add_residual
        self.add_layer_norm = add_layer_norm
        self.include_final_layer_norm = include_final_layer_norm

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        layer_sizes = [
            2 *
            layer.size if layer.direction == "bidirectional" else layer.size
            for layer in self.rnn_specs
        ]
        if add_residual and len(set(layer_sizes)) > 1:
            raise ValueError(
                "When using residual connectiong, all layers must have "
                "the same size, but are {}.".format(layer_sizes))

        self._variable_scope.set_initializer(
            tf.random_normal_initializer(stddev=0.001))
Ejemplo n.º 24
0
    def __init__(self,
                 name: str,
                 input_sequence: TemporalStateful,
                 rnn_layers: List[RNNSpecTuple],
                 add_residual: bool = False,
                 add_layer_norm: bool = False,
                 include_final_layer_norm: bool = True,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create a new instance of a recurrent encoder.

        Arguments:
            name: ModelPart name.
            input_seqeunce: The input sequence for the encoder.
            rnn_size: The dimension of the RNN hidden state vector.
            rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory
                cell to use.
            rnn_direction: One of "forward", "backward", "bidirectional". In
                what order to process the input sequence. Note that choosing
                "bidirectional" will double the resulting vector dimension as
                well as the number of encoder parameters.
            add_residual: Add residual connections to the RNN layer output.
            add_layer_norm: Add layer normalization after each RNN layer.
            include_final_layer_norm: Normalize also output of the network.
            dropout_keep_prob: 1 - dropout probability.
            save_checkpoint: ModelPart save checkpoint file.
            load_checkpoint: ModelPart load checkpoint file.
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)
        TemporalStatefulWithOutput.__init__(self)

        self.input_sequence = input_sequence
        self.dropout_keep_prob = dropout_keep_prob
        self.rnn_specs = [_make_rnn_spec(*r) for r in rnn_layers]
        self.add_residual = add_residual
        self.add_layer_norm = add_layer_norm
        self.include_final_layer_norm = include_final_layer_norm

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        layer_sizes = [
            2 * layer.size if layer.direction == "bidirectional"
            else layer.size for layer in self.rnn_specs]
        if add_residual and len(set(layer_sizes)) > 1:
            raise ValueError(
                "When using residual connectiong, all layers must have "
                "the same size, but are {}.".format(layer_sizes))

        self._variable_scope.set_initializer(
            tf.random_normal_initializer(stddev=0.001))
Ejemplo n.º 25
0
    def __init__(self,
                 name: str,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        self.query_state_size = None  # type: tf.Tensor
        self._histories = {}  # type: Dict[str, tf.Tensor]

        self.train_mode = tf.placeholder(tf.bool, [], "train_mode")
Ejemplo n.º 26
0
    def __init__(self,
                 name: str,
                 input_sequence: TemporalStateful,
                 ff_hidden_size: int,
                 depth: int,
                 n_heads: int,
                 dropout_keep_prob: float = 1.0,
                 attention_dropout_keep_prob: float = 1.0,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create an encoder of the Transformer model.

        Described in Vaswani et al. (2017), arxiv.org/abs/1706.03762

        Arguments:
            input_sequence: Embedded input sequence.
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects.
            dropout_keep_prob: Probability of keeping a value during dropout.

        Keyword arguments:
            ff_hidden_size: Size of the feedforward sublayers.
            n_heads: Number of the self-attention heads.
            depth: Number of sublayers.
            attention_dropout_keep_prob: Probability of keeping a value
                during dropout on the attention output.
        """
        check_argument_types()
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        self.input_sequence = input_sequence
        self.model_dimension = self.input_sequence.dimension
        self.ff_hidden_size = ff_hidden_size
        self.depth = depth
        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob
        self.attention_dropout_keep_prob = attention_dropout_keep_prob

        if self.depth <= 0:
            raise ValueError("Depth must be a positive integer.")

        if self.ff_hidden_size <= 0:
            raise ValueError("Feed forward hidden size must be a "
                             "positive integer.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if (self.attention_dropout_keep_prob <= 0.0
                or self.attention_dropout_keep_prob > 1.0):
            raise ValueError("Dropout keep prob for attn must be in (0,1].")

        self.train_mode = tf.placeholder(tf.bool, [], "train_mode")
        log("Output op: {}".format(self.output))
Ejemplo n.º 27
0
    def __init__(self,
                 name: str,
                 parent_decoder: AutoregressiveDecoder,
                 beam_size: int,
                 max_steps: int,
                 length_normalization: float) -> None:
        """Construct the beam search decoder graph.

        Arguments:
            name: The name for the model part.
            parent_decoder: An autoregressive decoder from which to sample.
            beam_size: The number of hypotheses in the beam.
            max_steps: The maximum number of time steps to perform.
            length_normalization: The alpha parameter from Eq. 14 in the paper.
        """
        check_argument_types()
        ModelPart.__init__(self, name)

        self.parent_decoder = parent_decoder
        self.beam_size = beam_size
        self.length_normalization = length_normalization
        self.max_steps_int = max_steps

        # Create a placeholder for maximum number of steps that is necessary
        # during ensembling, when the decoder is called repetitively with the
        # max_steps attribute set to one.
        self.max_steps = tf.placeholder_with_default(max_steps, [])

        # This is an ugly hack for handling the whole graph when expanding to
        # the beam. We need to access all the inner states of the network in
        # the graph, replace them with beam-size-times copied originals, create
        # the beam search graph, and then replace the inner states back.
        has_encoder = (hasattr(self.parent_decoder, "encoder_states")
                       and hasattr(self.parent_decoder, "encoder_mask"))

        if has_encoder:
            enc_states = self.parent_decoder.encoder_states
            enc_mask = self.parent_decoder.encoder_mask

        if has_encoder and enc_states is not None and enc_mask is not None:
            setattr(self.parent_decoder, "encoder_states",
                    self.expand_to_beam(enc_states))
            setattr(self.parent_decoder, "encoder_mask",
                    self.expand_to_beam(enc_mask))

        # Create the beam search symbolic graph.
        with self.use_scope():
            self.initial_loop_state = self.get_initial_loop_state()
            self.outputs = self.decoding_loop()

        # Reassign the original encoder states and mask back
        if has_encoder:
            setattr(self.parent_decoder, "encoder_states", enc_states)
            setattr(self.parent_decoder, "encoder_mask", enc_mask)
Ejemplo n.º 28
0
    def __init__(self,
                 name: str,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create a new ``BaseAttention`` object."""
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)

        self.query_state_size = None  # type: tf.Tensor
        self._histories = {}  # type: Dict[str, tf.Tensor]
Ejemplo n.º 29
0
    def __init__(self,
                 name: str,
                 encoders: List[Stateful],
                 vocabulary: Vocabulary,
                 data_id: str,
                 layers: List[int],
                 activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu,
                 dropout_keep_prob: float = 0.5,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Construct a new instance of the sequence classifier.

        Args:
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects
            encoders: Input encoders of the decoder
            vocabulary: Target vocabulary
            data_id: Target data series
            layers: List defining structure of the NN. Ini example:
                    layers=[100,20,5] ;creates classifier with hidden layers of
                                       size 100, 20, 5 and one output layer
                                       depending on the size of vocabulary
            activation_fn: activation function used on the output of each
                           hidden layer.
            dropout_keep_prob: Probability of keeping a value during dropout
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoders = encoders
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.layers = layers
        self.activation_fn = activation_fn
        self.dropout_keep_prob = dropout_keep_prob
        self.max_output_len = 1

        with self.use_scope():
            self.gt_inputs = [tf.placeholder(tf.int32, [None], "targets")]

            mlp_input = tf.concat([enc.output for enc in self.encoders], 1)
            self._mlp = MultilayerPerceptron(mlp_input,
                                             self.layers,
                                             self.dropout_keep_prob,
                                             len(self.vocabulary),
                                             activation_fn=self.activation_fn,
                                             train_mode=self.train_mode)

        tf.summary.scalar("train_optimization_cost",
                          self.cost,
                          collections=["summary_train"])
Ejemplo n.º 30
0
    def __init__(self,
                 name: str,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create a new ``BaseAttention`` object."""
        ModelPart.__init__(
            self, name, reuse, save_checkpoint, load_checkpoint, initializers)

        self.query_state_size = None  # type: tf.Tensor
        self._histories = {}  # type: Dict[str, tf.Tensor]
Ejemplo n.º 31
0
 def __init__(self,
              name: str,
              input_sequence: TemporalStateful,
              reuse: ModelPart = None,
              save_checkpoint: str = None,
              load_checkpoint: str = None,
              initializers: InitializerSpecs = None) -> None:
     """Initialize an instance of the pooling layer."""
     check_argument_types()
     ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                        initializers)
     self.input_sequence = input_sequence
Ejemplo n.º 32
0
    def __init__(self,
                 name: str,
                 encoders: List[Any],
                 vocabulary: Vocabulary,
                 data_id: str,
                 layers: Optional[List[int]] = None,
                 activation: Callable[[tf.Tensor], tf.Tensor] = tf.tanh,
                 dropout_keep_prob: float = 0.5,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        self.encoders = encoders
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.layers = layers
        self.activation = activation
        self.dropout_keep_prob = dropout_keep_prob
        self.max_output_len = 1

        with tf.variable_scope(name):
            self.learning_step = tf.get_variable(
                "learning_step", [],
                trainable=False,
                initializer=tf.constant_initializer(0))

            self.dropout_placeholder = \
                tf.placeholder(tf.float32, name="dropout_plc")
            self.gt_inputs = [
                tf.placeholder(tf.int32, shape=[None], name="targets")
            ]
            mlp_input = tf.concat(1, [enc.encoded for enc in encoders])
            mlp = MultilayerPerceptron(mlp_input,
                                       layers, self.dropout_placeholder,
                                       len(vocabulary))

            self.loss_with_gt_ins = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    mlp.logits, self.gt_inputs[0]))
            self.loss_with_decoded_ins = self.loss_with_gt_ins
            self.cost = self.loss_with_gt_ins

            self.decoded_seq = [mlp.classification]
            self.decoded_logits = [mlp.logits]
            self.runtime_logprobs = [tf.nn.log_softmax(mlp.logits)]

            tf.scalar_summary('val_optimization_cost',
                              self.cost,
                              collections=["summary_val"])
            tf.scalar_summary('train_optimization_cost',
                              self.cost,
                              collections=["summary_train"])
    def __init__(self,
                 encoder: RecurrentEncoder,
                 decoder: Decoder,
                 data_id: str,
                 name: str,
                 reuse: ModelPart = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, None, None, initializers)

        self.encoder = encoder
        self.decoder = decoder
        self.data_id = data_id
Ejemplo n.º 34
0
    def __init__(self,
                 name: str,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)

        self.query_state_size = None  # type: tf.Tensor
        self._histories = {}  # type: Dict[str, tf.Tensor]

        with self.use_scope():
            self.train_mode = tf.placeholder(tf.bool, [], "train_mode")
Ejemplo n.º 35
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Union[ConvSpec, ResNetSpec, MaxPoolSpec]],
                 image_height: int,
                 image_width: int,
                 pixel_dim: int,
                 fully_connected: List[int] = None,
                 batch_normalize: bool = False,
                 dropout_keep_prob: float = 0.5,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize a convolutional network for image processing.

        The convolutional network can consist of plain convolutions,
        max-pooling layers and residual block. In the configuration, they are
        specified using the following tuples.

            * convolution: ("C", kernel_size, stride, padding, out_channel);
            * max / average pooling: ("M"/"A", kernel_size, stride, padding);
            * residual block: ("R", kernel_size, out_channels).

        Padding must be either "valid" or "same".

        Args:
            convolutions: Configuration of convolutional layers.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the image.
            pixel_dim: Number of color channels in the input images.
            dropout_keep_prob: Probability of keeping neurons active in
                dropout. Dropout is done between all convolutional layers and
                fully connected layer.
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        self.image_height = image_height
        self.image_width = image_width
        self.pixel_dim = pixel_dim
        self.convolutions = convolutions
        self.fully_connected = fully_connected
        self.batch_normalize = batch_normalize
    def __init__(self,
                 name: str,
                 max_input_len: int,
                 vocabularies: List[Vocabulary],
                 data_ids: List[str],
                 embedding_sizes: List[int],
                 rnn_size: int,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None,
                 **kwargs) -> None:
        """Construct a new instance of the factored encoder.

        Args:
            max_input_len: Maximum input length (longer sequences are trimmed)
            vocabularies: List of vocabularies indexed
            data_ids: List of data series IDs
            embedding_sizes: List of embedding sizes for each data series
            name: The name for this encoder. [sentence_encoder]
            rnn_size: The size of the hidden state

        Keyword arguments:
            use_noisy_activations: Boolean flag whether to use noisy activation
                                   functions in RNN cells.
                                   (see neuralmonkey.nn.noisy_gru_cell) [False]
            attention_type: The attention to use. [None]
            attention_fertility: Fertility for CoverageAttention (if used). [3]
            dropout_keep_prob: 1 - Dropout probability [1]
        """
        attention_type = kwargs.get("attention_type", None)
        Attentive.__init__(self, attention_type)
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        assert check_argument_types()

        self.vocabularies = vocabularies
        self.data_ids = data_ids
        self.embedding_sizes = embedding_sizes

        self.max_input_len = max_input_len
        self.rnn_size = rnn_size

        self.dropout_keep_prob = kwargs.get("dropout_keep_prob", 1)

        self.use_noisy_activations = kwargs.get("use_noisy_activations", False)

        log("Building encoder graph, name: '{}'.".format(self.name))
        with tf.variable_scope(self.name):
            self._create_encoder_graph()
            log("Encoder graph constructed.")
Ejemplo n.º 37
0
    def __init__(self,
                 name: str,
                 input_shape: List[int],
                 data_id: str,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)

        assert len(input_shape) == 3

        self.data_id = data_id
        self.input_shape = input_shape
Ejemplo n.º 38
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 network_type: str,
                 slim_models_path: str,
                 load_checkpoint: str = None,
                 spatial_layer: str = None,
                 encoded_layer: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize pre-trained ImageNet network.

        Args:
            name: Name of the model part (the ImageNet network, will be in its
                scope, independently on `name`).
            data_id: Id of series with images (list of 3D numpy arrays)
            network_type: Identifier of ImageNet network from TFSlim.
            spatial_layer: String identifier of the convolutional map
                (model's endpoint). Check
                TFSlim documentation for end point specifications.
            encoded_layer: String id of the network layer that will be used as
                input of a decoder. `None` means averaging the convolutional
                maps.
            path_to_models: Path to Slim models in tensorflow/models
                repository.
            load_checkpoint: Checkpoint file from which the pre-trained network
                is loaded.
        """
        check_argument_types()

        ModelPart.__init__(self,
                           name,
                           load_checkpoint=load_checkpoint,
                           initializers=initializers,
                           save_checkpoint=None)
        sys.path.insert(0, slim_models_path)

        self.data_id = data_id
        self.network_type = network_type
        self.spatial_layer = spatial_layer
        self.encoded_layer = encoded_layer

        if self.network_type not in SUPPORTED_NETWORKS:
            raise ValueError(
                "Network '{}' is not among the supported ones ({})".format(
                    self.network_type, ", ".join(SUPPORTED_NETWORKS.keys())))

        self.net_specification = SUPPORTED_NETWORKS[self.network_type]()
        self.height, self.width = self.net_specification.image_size
Ejemplo n.º 39
0
    def __init__(self,
                 name: str,
                 encoder: Union[RecurrentEncoder, SentenceEncoder],
                 vocabulary: Vocabulary,
                 data_id: str,
                 dropout_keep_prob: float = 1.0,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        self.encoder = encoder
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        self.rnn_size = int(self.encoder.temporal_states.get_shape()[-1])
Ejemplo n.º 40
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 convolutions: List[Union[ConvSpec, ResNetSpec, MaxPoolSpec]],
                 image_height: int, image_width: int, pixel_dim: int,
                 fully_connected: List[int] = None,
                 batch_normalize: bool = False,
                 dropout_keep_prob: float = 0.5,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Initialize a convolutional network for image processing.

        The convolutional network can consist of plain convolutions,
        max-pooling layers and residual block. In the configuration, they are
        specified using the following tuples.

            * convolution: ("C", kernel_size, stride, padding, out_channel);
            * max / average pooling: ("M"/"A", kernel_size, stride, padding);
            * residual block: ("R", kernel_size, out_channels).

        Padding must be either "valid" or "same".

        Args:
            convolutions: Configuration of convolutional layers.
            data_id: Identifier of the data series in the dataset.
            image_height: Height of the input image in pixels.
            image_width: Width of the image.
            pixel_dim: Number of color channels in the input images.
            dropout_keep_prob: Probability of keeping neurons active in
                dropout. Dropout is done between all convolutional layers and
                fully connected layer.
        """
        check_argument_types()
        ModelPart.__init__(
            self, name, reuse, save_checkpoint, load_checkpoint, initializers)

        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        self.image_height = image_height
        self.image_width = image_width
        self.pixel_dim = pixel_dim
        self.convolutions = convolutions
        self.fully_connected = fully_connected
        self.batch_normalize = batch_normalize
Ejemplo n.º 41
0
    def __init__(self,
                 name: str,
                 encoder: RecurrentEncoder,
                 vocabulary: Vocabulary,
                 data_id: str,
                 dropout_keep_prob: float = 1.0,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)

        self.encoder = encoder
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.dropout_keep_prob = dropout_keep_prob

        self.rnn_size = self.encoder.rnn_size * 2
        self.max_output_len = self.encoder.input_sequence.max_length
Ejemplo n.º 42
0
    def __init__(self,
                 name: str,
                 parent_decoder: Decoder,
                 max_steps: int,
                 beam_size: int,
                 length_normalization: float,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        assert check_argument_types()

        self.parent_decoder = parent_decoder
        self._beam_size = beam_size
        self._max_steps = max_steps
        self._length_normalization = length_normalization

        self.outputs = self._decoding_loop()
Ejemplo n.º 43
0
    def __init__(self,
                 name: str,
                 data_id: str,
                 input_size: int,
                 max_input_len: int = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(
            self, name, reuse, save_checkpoint, load_checkpoint, initializers)

        self.data_id = data_id
        self.input_size = input_size
        self.max_input_len = max_input_len
        self.dropout_keep_prob = dropout_keep_prob
Ejemplo n.º 44
0
    def __init__(self,
                 name: str,
                 max_length: int = None,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Construct a new `Sequence` object.

        Arguments:
            name: The name for the `ModelPart` object
            max_length: Maximum length of sequences in the object (not checked)
            save_checkpoint: The save_checkpoint parameter for `ModelPart`
            load_checkpoint: The load_checkpoint parameter for `ModelPart`
        """
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.max_length = max_length
        if self.max_length is not None and self.max_length <= 0:
            raise ValueError("Max sequence length must be a positive integer.")
Ejemplo n.º 45
0
    def __init__(self,
                 name: str,
                 vocabulary: Vocabulary,
                 data_id: str,
                 embedding_size: int,
                 filters: List[Tuple[int, int]],
                 max_input_len: int = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create a new instance of the CNN sequence encoder.

        Based on: Yoon Kim: Convolutional Neural Networks for Sentence
        Classification (http://emnlp2014.org/papers/pdf/EMNLP2014181.pdf)

        Arguments:
            vocabulary: Input vocabulary
            data_id: Identifier of the data series fed to this encoder
            name: An unique identifier for this encoder
            max_input_len: Maximum length of an encoded sequence
            embedding_size: The size of the embedding vector assigned
                to each word
            filters: Specification of CNN filters. It is a list of tuples
                specifying the filter size and number of channels.
            dropout_keep_prob: The dropout keep probability
                (default 1.0)
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_input_len = max_input_len
        self.embedding_size = embedding_size
        self.dropout_keep_prob = dropout_keep_prob
        self.filters = filters
Ejemplo n.º 46
0
    def __init__(self,
                 name: str,
                 encoders: List[Stateful],
                 vocabulary: Vocabulary,
                 data_id: str,
                 layers: List[int],
                 activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu,
                 dropout_keep_prob: float = 0.5,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Construct a new instance of the sequence classifier.

        Args:
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects
            encoders: Input encoders of the decoder
            vocabulary: Target vocabulary
            data_id: Target data series
            layers: List defining structure of the NN. Ini example:
                    layers=[100,20,5] ;creates classifier with hidden layers of
                                       size 100, 20, 5 and one output layer
                                       depending on the size of vocabulary
            activation_fn: activation function used on the output of each
                           hidden layer.
            dropout_keep_prob: Probability of keeping a value during dropout
        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoders = encoders
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.layers = layers
        self.activation_fn = activation_fn
        self.dropout_keep_prob = dropout_keep_prob
        self.max_output_len = 1
Ejemplo n.º 47
0
    def __init__(self,
                 name: str,
                 encoders: List[Stateful],
                 data_id: str,
                 layers: List[int] = None,
                 activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu,
                 dropout_keep_prob: float = 1.0,
                 dimension: int = 1,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.encoders = encoders
        self.data_id = data_id
        self.max_output_len = 1
        self.dimension = dimension

        self._layers = layers
        self._activation_fn = activation_fn
        self._dropout_keep_prob = dropout_keep_prob
Ejemplo n.º 48
0
    def __init__(self,
                 name: str,
                 input_sequence: TemporalStateful,
                 ff_hidden_size: int,
                 depth: int,
                 n_heads: int,
                 dropout_keep_prob: float = 1.0,
                 attention_dropout_keep_prob: float = 1.0,
                 target_space_id: int = None,
                 use_att_transform_bias: bool = False,
                 use_positional_encoding: bool = True,
                 input_for_cross_attention: Attendable = None,
                 n_cross_att_heads: int = None,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create an encoder of the Transformer model.

        Described in Vaswani et al. (2017), arxiv.org/abs/1706.03762

        Arguments:
            input_sequence: Embedded input sequence.
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects.
            reuse: Reuse the model variables.
            dropout_keep_prob: Probability of keeping a value during dropout.
            target_space_id: Specifies the modality of the target space.
            use_att_transform_bias: Add bias when transforming qkv vectors
                for attention.
            use_positional_encoding: If True, position encoding signal is added
                to the input.

        Keyword arguments:
            ff_hidden_size: Size of the feedforward sublayers.
            n_heads: Number of the self-attention heads.
            depth: Number of sublayers.
            attention_dropout_keep_prob: Probability of keeping a value
                during dropout on the attention output.
            input_for_cross_attention: An attendable model part that is
                attended using cross-attention on every layer of the decoder,
                analogically to how encoder is attended in the decoder.
            n_cross_att_heads: Number of heads used in the cross-attention.

        """
        check_argument_types()
        ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint,
                           initializers)

        self.input_sequence = input_sequence
        self.ff_hidden_size = ff_hidden_size
        self.depth = depth
        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob
        self.attention_dropout_keep_prob = attention_dropout_keep_prob
        self.target_space_id = target_space_id
        self.use_att_transform_bias = use_att_transform_bias
        self.use_positional_encoding = use_positional_encoding
        self.input_for_cross_attention = input_for_cross_attention
        self.n_cross_att_heads = n_cross_att_heads

        if self.depth <= 0:
            raise ValueError("Depth must be a positive integer.")

        if self.ff_hidden_size <= 0:
            raise ValueError("Feed forward hidden size must be a "
                             "positive integer.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if (self.attention_dropout_keep_prob <= 0.0
                or self.attention_dropout_keep_prob > 1.0):
            raise ValueError("Dropout keep prob for attn must be in (0,1].")

        if self.target_space_id is not None and (self.target_space_id >= 32
                                                 or self.target_space_id < 0):
            raise ValueError(
                "If provided, the target space ID should be between 0 and 31. "
                "Was: {}".format(self.target_space_id))

        if (input_for_cross_attention is None) != (n_cross_att_heads is None):
            raise ValueError(
                "Either both input_for_cross_attention and n_cross_att_heads "
                "must be provided or none of them.")

        self._variable_scope.set_initializer(tf.variance_scaling_initializer(
            mode="fan_avg", distribution="uniform"))