def get_simple_rnn_model(event_dim, is_Training, temperature=1): # input_shape: (None, : different sequence lengths (per batch; every sequence in one batch does have the same dimension) # EVENT_DIM) : dimensionality of one event layer_one_args = { 'units': 128, 'input_shape': (None, event_dim), 'return_sequences': True, 'dropout': 0.5, 'recurrent_dropout': 0.5, } layer_two_args = { 'units': 128, 'return_sequences': True, 'dropout': 0.5, 'recurrent_dropout': 0.5, } # for generating if not is_Training: # we predict one by one event layer_one_args['input_shape'] = (1, event_dim) layer_one_args['batch_input_shape'] = (1, 1, event_dim) layer_one_args['stateful'] = True layer_two_args['stateful'] = True model = keras.Sequential() model.add(layers.LSTM(**layer_one_args)) # second LSTM layer model.add(layers.LSTM(**layer_two_args)) model.add(layers.Lambda(lambda x: x / temperature)) model.add(layers.Dense(units=event_dim, activation='softmax')) return model
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True): """RNN model for text.""" input_shape = (params['fix_len']) seq_input = layers.Input(shape=input_shape) # vocab+1 because of padding seq_emb = layers.Embedding(params['vocab_size'] + 1, params['emb_size'], input_length=params['fix_len'])(seq_input) lstm_out = layers.LSTM(params['hidden_lstm_size'], dropout=params['dropout_rate_lstm'])( seq_emb, training=training_dr_lstm) out = layers.Dropout(rate=params['dropout_rate'], seed=params['random_seed'])(lstm_out, training=training_dr_ll) if params['variational']: # scale kl loss by number of training examples. # larger training dataset depends less on prior def scaled_kl_fn(p, q, _): return tfp.distributions.kl_divergence(q, p) / params['n_train'] logits = tfpl.DenseReparameterization( params['n_class_in'], activation=None, kernel_divergence_fn=scaled_kl_fn, bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(), name='last_layer')(out) else: logits = layers.Dense( params['n_class_in'], activation=None, kernel_regularizer=regularizers.l2(params['reg_weight']), bias_regularizer=regularizers.l2(params['reg_weight']), name='last_layer')(out) probs = layers.Softmax(axis=1)(logits) return models.Model(seq_input, probs, name='rnn')
def __init__(self, word_embedding, data, use_cudnn_lstm=False, plot_model_architecture=True): self.hidden_units = 300 self.embed_model = word_embedding self.input_dim = word_embedding.embed_dim self.vocab_size = data.vocab_size self.left = data.premise self.right = data.hypothesis self.max_len = data.max_len self.dense_units = 32 self.name = '{}_glove{}_lstm{}_dense{}'.format(str(int(time.time())), self.input_dim, self.hidden_units, self.dense_units) embedding_matrix = np.zeros((self.vocab_size, self.input_dim)) for word, i in data.vocab: embedding_vector = self.embed_model.get_vector(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector embed = layers.Embedding( input_dim=self.vocab_size, output_dim=self.input_dim, embeddings_initializer=Constant(embedding_matrix), input_length=self.max_len, mask_zero=True, trainable=False) #embed.trainable=False if use_cudnn_lstm: lstm = layers.CuDNNLSTM(self.hidden_units, input_shape=(None, self.input_dim), unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') else: lstm = layers.LSTM(self.hidden_units, input_shape=(None, self.input_dim), unit_forget_bias=True, activation='relu', kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') left_input = Input(shape=(self.max_len), name='input_1') right_input = Input(shape=(self.max_len), name='input_2') embed_left = embed(left_input) embed_right = embed(right_input) print('embed:', embed_right.shape) left_output = lstm(embed_left) right_output = lstm(embed_right) print('lstm:', right_output.shape) l1_norm = lambda x: 1 - K.abs(x[0] - x[1]) merged = layers.Lambda(function=l1_norm, output_shape=lambda x: x[0], name='L1_distance')([left_output, right_output]) #merged = layers.concatenate([left_output, right_output]) #lstm_2 = layers.LSTM(hidden_units, unit_forget_bias=True, # activation = 'relu', kernel_regularizer='l2', name='lstm_layer2' )(merged) print('merged:', merged.shape) dense_1 = layers.Dense(self.dense_units, activation='relu')(merged) print('dense1:', dense_1.shape) output = layers.Dense(3, activation='softmax', name='output_layer')(dense_1) print('output:', output.shape) self.model = Model(inputs=[left_input, right_input], outputs=output) self.compile()