Example #1
0
    def __init__(self, config):
        self.inputs = model_inputs.ModelInputs(config)

        # Dropout functions for words.
        # These probabilistically zero-out all embedding values for individual
        # words.
        dropout_source, dropout_target = None, None
        if config.rnn_use_dropout and config.rnn_dropout_source > 0.0:

            def dropout_source(x):
                return tf.layers.dropout(x,
                                         noise_shape=(tf.shape(x)[0],
                                                      tf.shape(x)[1], 1),
                                         rate=config.rnn_dropout_source,
                                         training=self.inputs.training)

        if config.rnn_use_dropout and config.rnn_dropout_target > 0.0:

            def dropout_target(y):
                return tf.layers.dropout(y,
                                         noise_shape=(tf.shape(y)[0],
                                                      tf.shape(y)[1], 1),
                                         rate=config.rnn_dropout_target,
                                         training=self.inputs.training)

        # Dropout functions for use within FF, GRU, and attention layers.
        # We use Gal and Ghahramani (2016)-style dropout, so these functions
        # will be used to create 2D dropout masks that are reused at every
        # timestep.
        dropout_embedding, dropout_hidden = None, None
        if config.rnn_use_dropout and config.rnn_dropout_embedding > 0.0:

            def dropout_embedding(e):
                return tf.layers.dropout(e,
                                         noise_shape=tf.shape(e),
                                         rate=config.rnn_dropout_embedding,
                                         training=self.inputs.training)

        if config.rnn_use_dropout and config.rnn_dropout_hidden > 0.0:

            def dropout_hidden(h):
                return tf.layers.dropout(h,
                                         noise_shape=tf.shape(h),
                                         rate=config.rnn_dropout_hidden,
                                         training=self.inputs.training)

        batch_size = tf.shape(self.inputs.x)[-1]  # dynamic value

        with tf.variable_scope("encoder"):
            self.encoder = Encoder(config, batch_size, dropout_source,
                                   dropout_embedding, dropout_hidden)
            ctx, embs = self.encoder.get_context(self.inputs.x,
                                                 self.inputs.x_mask)

        with tf.variable_scope("decoder"):
            if config.tie_encoder_decoder_embeddings:
                tied_embeddings = self.encoder.emb_layer
            else:
                tied_embeddings = None
            self.decoder = Decoder(config, ctx, embs, self.inputs.x_mask,
                                   dropout_target, dropout_embedding,
                                   dropout_hidden, tied_embeddings)
            self.logits = self.decoder.score(self.inputs.y)

        with tf.variable_scope("loss"):
            self.loss_layer = layers.Masked_cross_entropy_loss(
                self.inputs.y,
                self.inputs.y_mask,
                config.label_smoothing,
                training=self.inputs.training)
            self._loss_per_sentence = self.loss_layer.forward(self.logits)
            self._loss = tf.reduce_mean(self._loss_per_sentence,
                                        keepdims=False)

        self.sampling_utils = SamplingUtils(config)
Example #2
0
    def __init__(self, config):
        self.inputs = ModelInputs(config)

        # Dropout functions for words.
        # These probabilistically zero-out all embedding values for individual
        # words.
        dropout_source, dropout_target = None, None
        if config.use_dropout and config.dropout_source > 0.0:
            def dropout_source(x):
                return tf.layers.dropout(
                    x, noise_shape=(tf.shape(x)[0], tf.shape(x)[1], 1),
                    rate=config.dropout_source, training=self.inputs.training)
        if config.use_dropout and config.dropout_target > 0.0:
            def dropout_target(y):
                return tf.layers.dropout(
                    y, noise_shape=(tf.shape(y)[0], tf.shape(y)[1], 1),
                    rate=config.dropout_target, training=self.inputs.training)

        # Dropout functions for use within FF, GRU, and attention layers.
        # We use Gal and Ghahramani (2016)-style dropout, so these functions
        # will be used to create 2D dropout masks that are reused at every
        # timestep.
        dropout_embedding, dropout_hidden = None, None
        if config.use_dropout and config.dropout_embedding > 0.0:
            def dropout_embedding(e):
                return tf.layers.dropout(e, noise_shape=tf.shape(e),
                                         rate=config.dropout_embedding,
                                         training=self.inputs.training)
        if config.use_dropout and config.dropout_hidden > 0.0:
            def dropout_hidden(h):
                return tf.layers.dropout(h, noise_shape=tf.shape(h),
                                         rate=config.dropout_hidden,
                                         training=self.inputs.training)

        batch_size = tf.shape(self.inputs.x)[-1]  # dynamic value

        with tf.variable_scope("encoder"):
            self.encoder = Encoder(config, batch_size, dropout_source,
                                   dropout_embedding, dropout_hidden)
            ctx = self.encoder.get_context(self.inputs.x, self.inputs.x_mask)

        with tf.variable_scope("decoder"):
            if config.tie_encoder_decoder_embeddings:
                tied_embeddings = self.encoder.emb_layer
            else:
                tied_embeddings = None
            self.decoder = Decoder(config, ctx, self.inputs.x_mask,
                                   dropout_target, dropout_embedding,
                                   dropout_hidden, tied_embeddings)
            self.logits = self.decoder.score(self.inputs.y)

        with tf.variable_scope("loss"):
            self.loss_layer = layers.Masked_cross_entropy_loss(
                self.inputs.y, self.inputs.y_mask, config.label_smoothing,
                training=self.inputs.training)
            self.loss_per_sentence = self.loss_layer.forward(self.logits)
            self.objective = tf.reduce_mean(self.loss_per_sentence,
                                            keepdims=False)
            self.l2_loss = tf.constant(0.0, dtype=tf.float32)
            if config.decay_c > 0.0:
                self.l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) * tf.constant(config.decay_c, dtype=tf.float32)
                self.objective += self.l2_loss

            self.map_l2_loss = tf.constant(0.0, dtype=tf.float32)
            if config.map_decay_c > 0.0:
                map_l2_acc = []
                for v in tf.trainable_variables():
                    prior_name = 'prior/'+v.name.split(':')[0]
                    prior_v = tf.get_variable(
                        prior_name, initializer=v.initialized_value(),
                        trainable=False, collections=['prior_variables'],
                        dtype=v.initialized_value().dtype)
                    map_l2_acc.append(tf.nn.l2_loss(v - prior_v))
                self.map_l2_loss = tf.add_n(map_l2_acc) * tf.constant(config.map_decay_c, dtype=tf.float32)
                self.objective += self.map_l2_loss

        self.sampled_ys = None
        self.beam_size, self.beam_ys, self.parents, self.cost = None, None, None, None