예제 #1
0
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True):
    """RNN model for text."""
    input_shape = (params['fix_len'])
    seq_input = layers.Input(shape=input_shape)
    # vocab+1 because of padding
    seq_emb = layers.Embedding(params['vocab_size'] + 1,
                               params['emb_size'],
                               input_length=params['fix_len'])(seq_input)
    lstm_out = layers.LSTM(params['hidden_lstm_size'],
                           dropout=params['dropout_rate_lstm'])(
                               seq_emb, training=training_dr_lstm)
    out = layers.Dropout(rate=params['dropout_rate'],
                         seed=params['random_seed'])(lstm_out,
                                                     training=training_dr_ll)
    if params['variational']:
        # scale kl loss by number of training examples.
        # larger training dataset depends less on prior
        def scaled_kl_fn(p, q, _):
            return tfp.distributions.kl_divergence(q, p) / params['n_train']

        logits = tfpl.DenseReparameterization(
            params['n_class_in'],
            activation=None,
            kernel_divergence_fn=scaled_kl_fn,
            bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(),
            name='last_layer')(out)
    else:
        logits = layers.Dense(
            params['n_class_in'],
            activation=None,
            kernel_regularizer=regularizers.l2(params['reg_weight']),
            bias_regularizer=regularizers.l2(params['reg_weight']),
            name='last_layer')(out)
    probs = layers.Softmax(axis=1)(logits)
    return models.Model(seq_input, probs, name='rnn')
def get_model(cfg, encoder_inputs, encoder_outputs):

    decoder_inputs = layers.Input(shape=(None, ),
                                  name='Decoder-Input')  # for teacher forcing

    dec_emb = layers.Embedding(cfg.num_input_tokens,
                               cfg.latent_dim,
                               name='Decoder-Embedding',
                               mask_zero=False)(decoder_inputs)

    dec_bn = layers.BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb)

    decoder_gru = layers.GRU(cfg.latent_dim,
                             return_state=True,
                             return_sequences=True,
                             name='Decoder-GRU')

    decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=encoder_outputs)

    x = layers.BatchNormalization(
        name='Decoder-Batchnorm-2')(decoder_gru_output)
    decoder_dense = layers.Dense(cfg.num_output_tokens,
                                 activation='softmax',
                                 name='Final-Output-Dense')

    decoder_outputs = decoder_dense(x)

    model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    return model
예제 #3
0
    def _build_userencoder(self, titleencoder, type="ini"):
        """The main function to create user encoder of LSTUR.

        Args:
            titleencoder (object): the news encoder of LSTUR.

        Return:
            object: the user encoder of LSTUR.
        """
        hparams = self.hparams
        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )
        user_indexes = keras.Input(shape=(1,), dtype="int32")

        user_embedding_layer = layers.Embedding(
            len(self.train_iterator.uid2index),
            hparams.gru_unit,
            trainable=True,
            embeddings_initializer="zeros",
        )

        long_u_emb = layers.Reshape((hparams.gru_unit,))(
            user_embedding_layer(user_indexes)
        )
        click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title)

        if type == "ini":
            user_present = layers.GRU(
                hparams.gru_unit,
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                bias_initializer=keras.initializers.Zeros(),
            )(
                layers.Masking(mask_value=0.0)(click_title_presents),
                initial_state=[long_u_emb],
            )
        elif type == "con":
            short_uemb = layers.GRU(
                hparams.gru_unit,
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                bias_initializer=keras.initializers.Zeros(),
            )(layers.Masking(mask_value=0.0)(click_title_presents))

            user_present = layers.Concatenate()([short_uemb, long_u_emb])
            user_present = layers.Dense(
                hparams.gru_unit,
                bias_initializer=keras.initializers.Zeros(),
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
            )(user_present)

        model = keras.Model(
            [his_input_title, user_indexes], user_present, name="user_encoder"
        )
        return model
예제 #4
0
    def _build_lstur(self):
        """The main function to create LSTUR's logic. The core of LSTUR
        is a user encoder and a news encoder.

        Returns:
            object: a model used to train.
            object: a model used to evaluate and inference.
        """
        hparams = self.hparams

        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )
        pred_input_title = keras.Input(
            shape=(hparams.npratio + 1, hparams.title_size), dtype="int32"
        )
        pred_input_title_one = keras.Input(
            shape=(
                1,
                hparams.title_size,
            ),
            dtype="int32",
        )
        pred_title_reshape = layers.Reshape((hparams.title_size,))(pred_input_title_one)
        user_indexes = keras.Input(shape=(1,), dtype="int32")

        embedding_layer = layers.Embedding(
            self.word2vec_embedding.shape[0],
            hparams.word_emb_dim,
            weights=[self.word2vec_embedding],
            trainable=True,
        )

        titleencoder = self._build_newsencoder(embedding_layer)
        self.userencoder = self._build_userencoder(titleencoder, type=hparams.type)
        self.newsencoder = titleencoder

        user_present = self.userencoder([his_input_title, user_indexes])
        news_present = layers.TimeDistributed(self.newsencoder)(pred_input_title)
        news_present_one = self.newsencoder(pred_title_reshape)

        preds = layers.Dot(axes=-1)([news_present, user_present])
        preds = layers.Activation(activation="softmax")(preds)

        pred_one = layers.Dot(axes=-1)([news_present_one, user_present])
        pred_one = layers.Activation(activation="sigmoid")(pred_one)

        model = keras.Model([user_indexes, his_input_title, pred_input_title], preds)
        scorer = keras.Model(
            [user_indexes, his_input_title, pred_input_title_one], pred_one
        )

        return model, scorer
def get_encoder_model(cfg):
    encoder_inputs = layers.Input(shape=(cfg.len_input_seq, ),
                                  name='Encoder-Input')

    x = layers.Embedding(cfg.num_input_tokens,
                         cfg.latent_dim,
                         name='Encoder-Embedding',
                         mask_zero=False)(encoder_inputs)

    x = layers.BatchNormalization(name='Encoder-Batchnorm-1')(x)

    _, state_h = layers.GRU(cfg.latent_dim, return_state=True,\
       name='Encoder-Last-GRU')(x)

    encoder_model = models.Model(inputs=encoder_inputs,
                                 outputs=state_h,
                                 name='Encoder-Model')

    encoder_outputs = encoder_model(encoder_inputs)

    return encoder_model, encoder_inputs, encoder_outputs
    def __init__(self,
                 word_embedding,
                 data,
                 use_cudnn_lstm=False,
                 plot_model_architecture=True):
        self.hidden_units = 300
        self.embed_model = word_embedding
        self.input_dim = word_embedding.embed_dim
        self.vocab_size = data.vocab_size
        self.left = data.premise
        self.right = data.hypothesis
        self.max_len = data.max_len
        self.dense_units = 32
        self.name = '{}_glove{}_lstm{}_dense{}'.format(str(int(time.time())),
                                                       self.input_dim,
                                                       self.hidden_units,
                                                       self.dense_units)

        embedding_matrix = np.zeros((self.vocab_size, self.input_dim))
        for word, i in data.vocab:
            embedding_vector = self.embed_model.get_vector(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector

        embed = layers.Embedding(
            input_dim=self.vocab_size,
            output_dim=self.input_dim,
            embeddings_initializer=Constant(embedding_matrix),
            input_length=self.max_len,
            mask_zero=True,
            trainable=False)
        #embed.trainable=False

        if use_cudnn_lstm:
            lstm = layers.CuDNNLSTM(self.hidden_units,
                                    input_shape=(None, self.input_dim),
                                    unit_forget_bias=True,
                                    kernel_initializer='he_normal',
                                    kernel_regularizer='l2',
                                    name='lstm_layer')
        else:
            lstm = layers.LSTM(self.hidden_units,
                               input_shape=(None, self.input_dim),
                               unit_forget_bias=True,
                               activation='relu',
                               kernel_initializer='he_normal',
                               kernel_regularizer='l2',
                               name='lstm_layer')
        left_input = Input(shape=(self.max_len), name='input_1')
        right_input = Input(shape=(self.max_len), name='input_2')

        embed_left = embed(left_input)
        embed_right = embed(right_input)

        print('embed:', embed_right.shape)

        left_output = lstm(embed_left)
        right_output = lstm(embed_right)
        print('lstm:', right_output.shape)
        l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
        merged = layers.Lambda(function=l1_norm,
                               output_shape=lambda x: x[0],
                               name='L1_distance')([left_output, right_output])
        #merged = layers.concatenate([left_output, right_output])
        #lstm_2 = layers.LSTM(hidden_units, unit_forget_bias=True,
        #                      activation = 'relu', kernel_regularizer='l2', name='lstm_layer2' )(merged)
        print('merged:', merged.shape)
        dense_1 = layers.Dense(self.dense_units, activation='relu')(merged)
        print('dense1:', dense_1.shape)
        output = layers.Dense(3, activation='softmax',
                              name='output_layer')(dense_1)
        print('output:', output.shape)
        self.model = Model(inputs=[left_input, right_input], outputs=output)

        self.compile()