예제 #1
0
    def output_projection_spec(self) -> Tuple[OutputProjection, int]:
        if self._output_projection_spec is None:
            log("No output projection specified - using tanh projection")
            return (nonlinear_output(self.rnn_size, tf.tanh)[0], self.rnn_size)

        if isinstance(self._output_projection_spec, tuple):
            return self._output_projection_spec

        return cast(OutputProjection,
                    self._output_projection_spec), self.rnn_size
예제 #2
0
    def output_projection_spec(self) -> Tuple[OutputProjection, int]:
        if self._output_projection_spec is None:
            log("No output projection specified - using tanh projection")
            return (nonlinear_output(self.rnn_size, tf.tanh)[0], self.rnn_size)

        if isinstance(self._output_projection_spec, tuple):
            return self._output_projection_spec

        return cast(OutputProjection,
                    self._output_projection_spec), self.rnn_size
예제 #3
0
    def __init__(self,
                 encoders: List[Stateful],
                 vocabulary: Vocabulary,
                 data_id: str,
                 name: str,
                 max_output_len: int,
                 dropout_keep_prob: float = 1.0,
                 embedding_size: int = None,
                 embeddings_source: EmbeddedSequence = None,
                 tie_embeddings: bool = False,
                 label_smoothing: float = None,
                 rnn_size: int = None,
                 output_projection: OutputProjectionSpec = None,
                 encoder_projection: EncoderProjection = None,
                 attentions: List[BaseAttention] = None,
                 attention_on_input: bool = False,
                 rnn_cell: str = "GRU",
                 conditional_gru: bool = False,
                 supress_unk: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Create a refactored version of monster decoder.

        Arguments:
            encoders: Input encoders of the decoder.
            vocabulary: Target vocabulary.
            data_id: Target data series.
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects.
            max_output_len: Maximum length of an output sequence.
            dropout_keep_prob: Probability of keeping a value during dropout.
            embedding_size: Size of embedding vectors for target words.
            embeddings_source: Embedded sequence to take embeddings from.
            tie_embeddings: Use decoder.embedding_matrix also in place
                of the output decoding matrix.
            rnn_size: Size of the decoder hidden state, if None set
                according to encoders.
            output_projection: How to generate distribution over vocabulary
                from decoder_outputs.
            encoder_projection: How to construct initial state from encoders.
            attention: The attention object to use. Optional.
            rnn_cell: RNN Cell used by the decoder (GRU or LSTM).
            conditional_gru: Flag whether to use the Conditional GRU
                architecture.
            attention_on_input: Flag whether attention from previous decoding
                step should be combined with the input in the next step.
            supress_unk: If true, decoder will not produce symbols for unknown
                tokens.
            reuse: Reuse the model variables from the given model part.
        """
        check_argument_types()
        AutoregressiveDecoder.__init__(self,
                                       name=name,
                                       vocabulary=vocabulary,
                                       data_id=data_id,
                                       max_output_len=max_output_len,
                                       dropout_keep_prob=dropout_keep_prob,
                                       embedding_size=embedding_size,
                                       embeddings_source=embeddings_source,
                                       tie_embeddings=tie_embeddings,
                                       label_smoothing=label_smoothing,
                                       supress_unk=supress_unk,
                                       reuse=reuse,
                                       save_checkpoint=save_checkpoint,
                                       load_checkpoint=load_checkpoint,
                                       initializers=initializers)

        self.encoders = encoders
        self.output_projection_spec = output_projection
        self._conditional_gru = conditional_gru
        self._attention_on_input = attention_on_input
        self._rnn_cell_str = rnn_cell

        self.attentions = []  # type: List[BaseAttention]
        if attentions is not None:
            self.attentions = attentions

        if rnn_size is not None:
            self.rnn_size = rnn_size

        if encoder_projection is not None:
            self.encoder_projection = encoder_projection
        elif not self.encoders:
            log("No direct encoder input. Using empty initial state")
            self.encoder_projection = empty_initial_state
        elif rnn_size is None:
            log("No rnn_size or encoder_projection: Using concatenation of"
                " encoded states")
            self.encoder_projection = concat_encoder_projection
            self.rnn_size = sum(e.output.get_shape()[1].value
                                for e in encoders)
        else:
            log("Using linear projection of encoders as the initial state")
            self.encoder_projection = linear_encoder_projection(
                self.dropout_keep_prob)

        assert self.rnn_size is not None

        if self._rnn_cell_str not in RNN_CELL_TYPES:
            raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or "
                             "'NematusGRU'. Not {}".format(self._rnn_cell_str))

        if self.output_projection_spec is None:
            log("No output projection specified - using tanh projection")
            self.output_projection = nonlinear_output(self.rnn_size,
                                                      tf.tanh)[0]
            self.output_projection_size = self.rnn_size
        elif isinstance(self.output_projection_spec, tuple):
            self.output_projection_spec = cast(Tuple[OutputProjection, int],
                                               self.output_projection_spec)
            (self.output_projection,
             self.output_projection_size) = self.output_projection_spec
        else:
            self.output_projection = cast(OutputProjection,
                                          self.output_projection_spec)
            self.output_projection_size = self.rnn_size

        if self._attention_on_input:
            self.input_projection = self.input_plus_attention
        else:
            self.input_projection = self.embed_input_symbol

        with self.use_scope():
            with tf.variable_scope("attention_decoder") as self.step_scope:
                pass

        self._variable_scope.set_initializer(
            tf.random_normal_initializer(stddev=0.001))

        # TODO when it is possible, remove the printing of the cost var
        log("Decoder initalized. Cost var: {}".format(str(self.cost)))
        log("Runtime logits tensor: {}".format(str(self.runtime_logits)))
예제 #4
0
    def __init__(self,
                 encoders: List[Stateful],
                 vocabulary: Vocabulary,
                 data_id: str,
                 name: str,
                 max_output_len: int,
                 dropout_keep_prob: float = 1.0,
                 rnn_size: int = None,
                 embedding_size: int = None,
                 output_projection: OutputProjectionSpec = None,
                 encoder_projection: EncoderProjection = None,
                 attentions: List[BaseAttention] = None,
                 embeddings_source: EmbeddedSequence = None,
                 attention_on_input: bool = True,
                 rnn_cell: str = "GRU",
                 conditional_gru: bool = False,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        """Create a refactored version of monster decoder.

        Arguments:
            encoders: Input encoders of the decoder
            vocabulary: Target vocabulary
            data_id: Target data series
            name: Name of the decoder. Should be unique accross all Neural
                Monkey objects
            max_output_len: Maximum length of an output sequence
            dropout_keep_prob: Probability of keeping a value during dropout

        Keyword arguments:
            rnn_size: Size of the decoder hidden state, if None set
                according to encoders.
            embedding_size: Size of embedding vectors for target words
            output_projection: How to generate distribution over vocabulary
                from decoder rnn_outputs
            encoder_projection: How to construct initial state from encoders
            attention: The attention object to use. Optional.
            embeddings_source: Embedded sequence to take embeddings from
            rnn_cell: RNN Cell used by the decoder (GRU or LSTM)
            conditional_gru: Flag whether to use the Conditional GRU
                architecture
            attention_on_input: Flag whether attention from previous decoding
                step should be combined with the input in the next step.
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint)
        check_argument_types()

        log("Initializing decoder, name: '{}'".format(name))

        self.encoders = encoders
        self.vocabulary = vocabulary
        self.data_id = data_id
        self.max_output_len = max_output_len
        self.dropout_keep_prob = dropout_keep_prob
        self.embedding_size = embedding_size
        self.rnn_size = rnn_size
        self.output_projection_spec = output_projection
        self.encoder_projection = encoder_projection
        self.attentions = attentions
        self.embeddings_source = embeddings_source
        self._conditional_gru = conditional_gru
        self._attention_on_input = attention_on_input
        self._rnn_cell_str = rnn_cell

        if self.attentions is None:
            self.attentions = []

        if self.embedding_size is None and self.embeddings_source is None:
            raise ValueError("You must specify either embedding size or the "
                             "embedded sequence from which to reuse the "
                             "embeddings (e.g. set either 'embedding_size' or "
                             " 'embeddings_source' parameter)")

        if self.embeddings_source is not None:
            if self.embedding_size is not None:
                warn("Overriding the embedding_size parameter with the"
                     " size of the reused embeddings from the encoder.")

            self.embedding_size = (
                self.embeddings_source.embedding_matrix.get_shape()[1].value)

        if self.encoder_projection is None:
            if not self.encoders:
                log("No encoder - language model only.")
                self.encoder_projection = empty_initial_state
            elif rnn_size is None:
                log("No rnn_size or encoder_projection: Using concatenation of"
                    " encoded states")
                self.encoder_projection = concat_encoder_projection
                self.rnn_size = sum(e.output.get_shape()[1].value
                                    for e in encoders)
            else:
                log("Using linear projection of encoders as the initial state")
                self.encoder_projection = linear_encoder_projection(
                    self.dropout_keep_prob)

        assert self.rnn_size is not None

        if self._rnn_cell_str not in RNN_CELL_TYPES:
            raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or "
                             "'NematusGRU'. Not {}".format(self._rnn_cell_str))

        if self.output_projection_spec is None:
            log("No output projection specified - using tanh projection")
            self.output_projection = nonlinear_output(
                self.rnn_size, tf.tanh)[0]
            self.output_projection_size = self.rnn_size
        elif isinstance(self.output_projection_spec, tuple):
            (self.output_projection,
             self.output_projection_size) = tuple(self.output_projection_spec)
        else:
            self.output_projection = self.output_projection_spec
            self.output_projection_size = self.rnn_size

        if self._attention_on_input:
            self.input_projection = self.input_plus_attention
        else:
            self.input_projection = self.embed_input_symbol

        with self.use_scope():
            with tf.variable_scope("attention_decoder") as self.step_scope:
                pass

        # TODO when it is possible, remove the printing of the cost var
        log("Decoder initalized. Cost var: {}".format(str(self.cost)))
        log("Runtime logits tensor: {}".format(str(self.runtime_logits)))