예제 #1
0
def get_simple_rnn_model(event_dim, is_Training, temperature=1):
    # input_shape: (None,         : different sequence lengths (per batch; every sequence in one batch does have the same dimension)
    #               EVENT_DIM)    : dimensionality of one event
    layer_one_args = {
        'units': 128,
        'input_shape': (None, event_dim),
        'return_sequences': True,
        'dropout': 0.5,
        'recurrent_dropout': 0.5,
    }
    layer_two_args = {
        'units': 128,
        'return_sequences': True,
        'dropout': 0.5,
        'recurrent_dropout': 0.5,
    }
    # for generating
    if not is_Training:
        # we predict one by one event
        layer_one_args['input_shape'] = (1, event_dim)
        layer_one_args['batch_input_shape'] = (1, 1, event_dim)
        layer_one_args['stateful'] = True
        layer_two_args['stateful'] = True

    model = keras.Sequential()
    model.add(layers.LSTM(**layer_one_args))
    # second LSTM layer
    model.add(layers.LSTM(**layer_two_args))
    model.add(layers.Lambda(lambda x: x / temperature))
    model.add(layers.Dense(units=event_dim, activation='softmax'))

    return model
예제 #2
0
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True):
    """RNN model for text."""
    input_shape = (params['fix_len'])
    seq_input = layers.Input(shape=input_shape)
    # vocab+1 because of padding
    seq_emb = layers.Embedding(params['vocab_size'] + 1,
                               params['emb_size'],
                               input_length=params['fix_len'])(seq_input)
    lstm_out = layers.LSTM(params['hidden_lstm_size'],
                           dropout=params['dropout_rate_lstm'])(
                               seq_emb, training=training_dr_lstm)
    out = layers.Dropout(rate=params['dropout_rate'],
                         seed=params['random_seed'])(lstm_out,
                                                     training=training_dr_ll)
    if params['variational']:
        # scale kl loss by number of training examples.
        # larger training dataset depends less on prior
        def scaled_kl_fn(p, q, _):
            return tfp.distributions.kl_divergence(q, p) / params['n_train']

        logits = tfpl.DenseReparameterization(
            params['n_class_in'],
            activation=None,
            kernel_divergence_fn=scaled_kl_fn,
            bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(),
            name='last_layer')(out)
    else:
        logits = layers.Dense(
            params['n_class_in'],
            activation=None,
            kernel_regularizer=regularizers.l2(params['reg_weight']),
            bias_regularizer=regularizers.l2(params['reg_weight']),
            name='last_layer')(out)
    probs = layers.Softmax(axis=1)(logits)
    return models.Model(seq_input, probs, name='rnn')
    def __init__(self,
                 word_embedding,
                 data,
                 use_cudnn_lstm=False,
                 plot_model_architecture=True):
        self.hidden_units = 300
        self.embed_model = word_embedding
        self.input_dim = word_embedding.embed_dim
        self.vocab_size = data.vocab_size
        self.left = data.premise
        self.right = data.hypothesis
        self.max_len = data.max_len
        self.dense_units = 32
        self.name = '{}_glove{}_lstm{}_dense{}'.format(str(int(time.time())),
                                                       self.input_dim,
                                                       self.hidden_units,
                                                       self.dense_units)

        embedding_matrix = np.zeros((self.vocab_size, self.input_dim))
        for word, i in data.vocab:
            embedding_vector = self.embed_model.get_vector(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector

        embed = layers.Embedding(
            input_dim=self.vocab_size,
            output_dim=self.input_dim,
            embeddings_initializer=Constant(embedding_matrix),
            input_length=self.max_len,
            mask_zero=True,
            trainable=False)
        #embed.trainable=False

        if use_cudnn_lstm:
            lstm = layers.CuDNNLSTM(self.hidden_units,
                                    input_shape=(None, self.input_dim),
                                    unit_forget_bias=True,
                                    kernel_initializer='he_normal',
                                    kernel_regularizer='l2',
                                    name='lstm_layer')
        else:
            lstm = layers.LSTM(self.hidden_units,
                               input_shape=(None, self.input_dim),
                               unit_forget_bias=True,
                               activation='relu',
                               kernel_initializer='he_normal',
                               kernel_regularizer='l2',
                               name='lstm_layer')
        left_input = Input(shape=(self.max_len), name='input_1')
        right_input = Input(shape=(self.max_len), name='input_2')

        embed_left = embed(left_input)
        embed_right = embed(right_input)

        print('embed:', embed_right.shape)

        left_output = lstm(embed_left)
        right_output = lstm(embed_right)
        print('lstm:', right_output.shape)
        l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
        merged = layers.Lambda(function=l1_norm,
                               output_shape=lambda x: x[0],
                               name='L1_distance')([left_output, right_output])
        #merged = layers.concatenate([left_output, right_output])
        #lstm_2 = layers.LSTM(hidden_units, unit_forget_bias=True,
        #                      activation = 'relu', kernel_regularizer='l2', name='lstm_layer2' )(merged)
        print('merged:', merged.shape)
        dense_1 = layers.Dense(self.dense_units, activation='relu')(merged)
        print('dense1:', dense_1.shape)
        output = layers.Dense(3, activation='softmax',
                              name='output_layer')(dense_1)
        print('output:', output.shape)
        self.model = Model(inputs=[left_input, right_input], outputs=output)

        self.compile()