def rnn_model(params, training_dr_lstm=True, training_dr_ll=True): """RNN model for text.""" input_shape = (params['fix_len']) seq_input = layers.Input(shape=input_shape) # vocab+1 because of padding seq_emb = layers.Embedding(params['vocab_size'] + 1, params['emb_size'], input_length=params['fix_len'])(seq_input) lstm_out = layers.LSTM(params['hidden_lstm_size'], dropout=params['dropout_rate_lstm'])( seq_emb, training=training_dr_lstm) out = layers.Dropout(rate=params['dropout_rate'], seed=params['random_seed'])(lstm_out, training=training_dr_ll) if params['variational']: # scale kl loss by number of training examples. # larger training dataset depends less on prior def scaled_kl_fn(p, q, _): return tfp.distributions.kl_divergence(q, p) / params['n_train'] logits = tfpl.DenseReparameterization( params['n_class_in'], activation=None, kernel_divergence_fn=scaled_kl_fn, bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(), name='last_layer')(out) else: logits = layers.Dense( params['n_class_in'], activation=None, kernel_regularizer=regularizers.l2(params['reg_weight']), bias_regularizer=regularizers.l2(params['reg_weight']), name='last_layer')(out) probs = layers.Softmax(axis=1)(logits) return models.Model(seq_input, probs, name='rnn')
def get_model(cfg, encoder_inputs, encoder_outputs): decoder_inputs = layers.Input(shape=(None, ), name='Decoder-Input') # for teacher forcing dec_emb = layers.Embedding(cfg.num_input_tokens, cfg.latent_dim, name='Decoder-Embedding', mask_zero=False)(decoder_inputs) dec_bn = layers.BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = layers.GRU(cfg.latent_dim, return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=encoder_outputs) x = layers.BatchNormalization( name='Decoder-Batchnorm-2')(decoder_gru_output) decoder_dense = layers.Dense(cfg.num_output_tokens, activation='softmax', name='Final-Output-Dense') decoder_outputs = decoder_dense(x) model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs) return model
def _build_userencoder(self, titleencoder, type="ini"): """The main function to create user encoder of LSTUR. Args: titleencoder (object): the news encoder of LSTUR. Return: object: the user encoder of LSTUR. """ hparams = self.hparams his_input_title = keras.Input( shape=(hparams.his_size, hparams.title_size), dtype="int32" ) user_indexes = keras.Input(shape=(1,), dtype="int32") user_embedding_layer = layers.Embedding( len(self.train_iterator.uid2index), hparams.gru_unit, trainable=True, embeddings_initializer="zeros", ) long_u_emb = layers.Reshape((hparams.gru_unit,))( user_embedding_layer(user_indexes) ) click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title) if type == "ini": user_present = layers.GRU( hparams.gru_unit, kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=keras.initializers.Zeros(), )( layers.Masking(mask_value=0.0)(click_title_presents), initial_state=[long_u_emb], ) elif type == "con": short_uemb = layers.GRU( hparams.gru_unit, kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=keras.initializers.Zeros(), )(layers.Masking(mask_value=0.0)(click_title_presents)) user_present = layers.Concatenate()([short_uemb, long_u_emb]) user_present = layers.Dense( hparams.gru_unit, bias_initializer=keras.initializers.Zeros(), kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), )(user_present) model = keras.Model( [his_input_title, user_indexes], user_present, name="user_encoder" ) return model
def _build_lstur(self): """The main function to create LSTUR's logic. The core of LSTUR is a user encoder and a news encoder. Returns: object: a model used to train. object: a model used to evaluate and inference. """ hparams = self.hparams his_input_title = keras.Input( shape=(hparams.his_size, hparams.title_size), dtype="int32" ) pred_input_title = keras.Input( shape=(hparams.npratio + 1, hparams.title_size), dtype="int32" ) pred_input_title_one = keras.Input( shape=( 1, hparams.title_size, ), dtype="int32", ) pred_title_reshape = layers.Reshape((hparams.title_size,))(pred_input_title_one) user_indexes = keras.Input(shape=(1,), dtype="int32") embedding_layer = layers.Embedding( self.word2vec_embedding.shape[0], hparams.word_emb_dim, weights=[self.word2vec_embedding], trainable=True, ) titleencoder = self._build_newsencoder(embedding_layer) self.userencoder = self._build_userencoder(titleencoder, type=hparams.type) self.newsencoder = titleencoder user_present = self.userencoder([his_input_title, user_indexes]) news_present = layers.TimeDistributed(self.newsencoder)(pred_input_title) news_present_one = self.newsencoder(pred_title_reshape) preds = layers.Dot(axes=-1)([news_present, user_present]) preds = layers.Activation(activation="softmax")(preds) pred_one = layers.Dot(axes=-1)([news_present_one, user_present]) pred_one = layers.Activation(activation="sigmoid")(pred_one) model = keras.Model([user_indexes, his_input_title, pred_input_title], preds) scorer = keras.Model( [user_indexes, his_input_title, pred_input_title_one], pred_one ) return model, scorer
def get_encoder_model(cfg): encoder_inputs = layers.Input(shape=(cfg.len_input_seq, ), name='Encoder-Input') x = layers.Embedding(cfg.num_input_tokens, cfg.latent_dim, name='Encoder-Embedding', mask_zero=False)(encoder_inputs) x = layers.BatchNormalization(name='Encoder-Batchnorm-1')(x) _, state_h = layers.GRU(cfg.latent_dim, return_state=True,\ name='Encoder-Last-GRU')(x) encoder_model = models.Model(inputs=encoder_inputs, outputs=state_h, name='Encoder-Model') encoder_outputs = encoder_model(encoder_inputs) return encoder_model, encoder_inputs, encoder_outputs
def __init__(self, word_embedding, data, use_cudnn_lstm=False, plot_model_architecture=True): self.hidden_units = 300 self.embed_model = word_embedding self.input_dim = word_embedding.embed_dim self.vocab_size = data.vocab_size self.left = data.premise self.right = data.hypothesis self.max_len = data.max_len self.dense_units = 32 self.name = '{}_glove{}_lstm{}_dense{}'.format(str(int(time.time())), self.input_dim, self.hidden_units, self.dense_units) embedding_matrix = np.zeros((self.vocab_size, self.input_dim)) for word, i in data.vocab: embedding_vector = self.embed_model.get_vector(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector embed = layers.Embedding( input_dim=self.vocab_size, output_dim=self.input_dim, embeddings_initializer=Constant(embedding_matrix), input_length=self.max_len, mask_zero=True, trainable=False) #embed.trainable=False if use_cudnn_lstm: lstm = layers.CuDNNLSTM(self.hidden_units, input_shape=(None, self.input_dim), unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') else: lstm = layers.LSTM(self.hidden_units, input_shape=(None, self.input_dim), unit_forget_bias=True, activation='relu', kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') left_input = Input(shape=(self.max_len), name='input_1') right_input = Input(shape=(self.max_len), name='input_2') embed_left = embed(left_input) embed_right = embed(right_input) print('embed:', embed_right.shape) left_output = lstm(embed_left) right_output = lstm(embed_right) print('lstm:', right_output.shape) l1_norm = lambda x: 1 - K.abs(x[0] - x[1]) merged = layers.Lambda(function=l1_norm, output_shape=lambda x: x[0], name='L1_distance')([left_output, right_output]) #merged = layers.concatenate([left_output, right_output]) #lstm_2 = layers.LSTM(hidden_units, unit_forget_bias=True, # activation = 'relu', kernel_regularizer='l2', name='lstm_layer2' )(merged) print('merged:', merged.shape) dense_1 = layers.Dense(self.dense_units, activation='relu')(merged) print('dense1:', dense_1.shape) output = layers.Dense(3, activation='softmax', name='output_layer')(dense_1) print('output:', output.shape) self.model = Model(inputs=[left_input, right_input], outputs=output) self.compile()