Esempio n. 1
0
def build_lstm(output_dim, embeddings):

    loss_function = "categorical_crossentropy"

    # this is the placeholder tensor for the input sequences
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype="int32")

    # this embedding layer will transform the sequences of integers
    embedded = Embedding(embeddings.shape[0],
                         embeddings.shape[1],
                         input_length=MAX_SEQUENCE_LENGTH,
                         weights=[embeddings],
                         trainable=True)(sequence)

    # 4 convolution layers (each 1000 filters)
    cnn = [
        Convolution1D(filter_length=filters,
                      nb_filter=1000,
                      border_mode="same") for filters in [2, 3, 5, 7]
    ]
    # concatenate
    merged_cnn = merge([cnn(embedded) for cnn in cnn], mode="concat")
    # create attention vector from max-pooled convoluted
    maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    attention_vector = maxpool(merged_cnn)

    forwards = AttentionLSTM(64, attention_vector)(embedded)
    backwards = AttentionLSTM(64, attention_vector,
                              go_backwards=True)(embedded)

    # concatenate the outputs of the 2 LSTM layers
    bi_lstm = merge([forwards, backwards], mode="concat", concat_axis=-1)

    after_dropout = Dropout(0.5)(bi_lstm)

    # softmax output layer
    output = Dense(output_dim=output_dim, activation="softmax")(after_dropout)

    # the complete omdel
    model = Model(input=sequence, output=output)

    # try using different optimizers and different optimizer configs
    model.compile("adagrad", loss_function, metrics=["accuracy"])

    return model
Esempio n. 2
0
def lstm_attention(X_train, y_train, X_test, y_test, vocab_size):
    X_train = sequence.pad_sequences(X_train, maxlen=MAX_LEN)
    X_test = sequence.pad_sequences(X_test, maxlen=MAX_LEN)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Build model...')
    model = Sequential()

    # data
    data = Input(shape=(MAX_LEN, ), dtype='int32', name='data')

    # embedding
    embedding = Embedding(vocab_size,
                          EMBED_SIZE,
                          input_length=MAX_LEN,
                          dropout=0.2)
    data_embedding = embedding(data)

    # dropout
    dropout = Dropout(0.25)
    data_dropout = dropout(data_embedding)

    # rnn
    rnn = RNN(HIDDEN_SIZE)

    data_rnn = RNN(data_dropout)
    #data_dropout = dropout(data_rnn)

    # maxpooling
    maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    data_pool = maxpool(data_dropout)

    rnn = AttentionLSTM(HIDDEN_SIZE, data_pool)
Esempio n. 3
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = self.model_params.get('initial_embed_weights', None)
        weights = weights if weights is None else [weights]
        embedding = Embedding(
            input_dim=self.config['n_words'],
            output_dim=self.model_params.get('n_embed_dims', 256),
            # weights=weights,
            mask_zero=True)
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # turn off layer updating
        # embedding.params = []
        # embedding.updates = []

        # question rnn part
        f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141),
                     return_sequences=True,
                     dropout_U=0.2,
                     consume_less='mem')
        b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141),
                     return_sequences=True,
                     dropout_U=0.2,
                     consume_less='mem',
                     go_backwards=True)
        question_f_rnn = f_rnn(question_embedding)
        question_b_rnn = b_rnn(question_embedding)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        avepool = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))

        # otherwise, it will raise a exception like:
        # Layer lambda_1 does not
        # support masking, but was passed an input_mask: Elemwise{neq,no_inplace}.0
        maxpool.__setattr__('supports_masking', True)
        avepool.__setattr__('supports_masking', True)

        question_pool = merge(
            [maxpool(question_f_rnn),
             maxpool(question_b_rnn)],
            mode='concat',
            concat_axis=-1)

        # answer rnn part
        f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141),
                              question_pool,
                              return_sequences=True,
                              consume_less='mem',
                              single_attention_param=True)
        b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141),
                              question_pool,
                              return_sequences=True,
                              consume_less='mem',
                              go_backwards=True,
                              single_attention_param=True)
        answer_f_rnn = f_rnn(answer_embedding)
        answer_b_rnn = b_rnn(answer_embedding)
        answer_pool = merge([maxpool(answer_f_rnn),
                             maxpool(answer_b_rnn)],
                            mode='concat',
                            concat_axis=-1)

        return question_pool, answer_pool
Esempio n. 4
0
'''
Embedding层只能作为模型的第一层
输入:最大单词数,即字典长度;句子向量表示的输出维度
# weights=[weights]
'''
weights = np.load('word2vec_100_dim.embeddings')  # (22353L,100L)
model.add(Embedding(input_dim=MAX_NB_WORDS, output_dim=100, weights=[weights]))
'''
outputshape: 如果return_sequences=True,那么输出3维 tensor(nb_samples, timesteps, output_dim) .
否则输出2维tensor(nb_samples,output_dim)。
Exception: Input 0 is incompatible with layer dense_1: expected ndim=2, found ndim=3
'''
# lstm = LSTM(128, W_regularizer=l2(0.01), return_sequences=True)
# model.add(AttentionLSTMWrapper(lstm, single_attention_param=True))
model.add(
    AttentionLSTM(100, W_regularizer=l2(0.01), dropout_W=0.2, dropout_U=0.2))
model.add(Activation('tanh'))

model.add(Dense(1, activation='softmax'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()  # 打印模型的概况

######################################
# 训练LSTM_ATTNets模型
######################################

print('Train...')
print('\tHere, batch_size =', BATCH_SIZE, ", epoch =", EPOCH, ", lr =", LR)
# early_stopping = EarlyStopping(monitor='val_loss', patience=2)
Esempio n. 5
0
def get_attention_lstm(word_index_to_embeddings_map,
                       max_len,
                       rich_context: bool = False,
                       **kwargs):
    # converting embeddings to numpy 2d array: shape = (vocabulary_size, 300)
    embeddings = np.asarray([
        np.array(x, dtype=float32)
        for x in word_index_to_embeddings_map.values()
    ])
    print('embeddings.shape', embeddings.shape)

    lstm_size = kwargs.get('lstm_size')
    dropout = kwargs.get('dropout')
    assert lstm_size
    assert dropout

    # define basic four input layers - for warrant0, warrant1, reason, claim
    sequence_layer_warrant0_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_warrant0_input")
    sequence_layer_warrant1_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_warrant1_input")
    sequence_layer_reason_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_reason_input")
    sequence_layer_claim_input = Input(shape=(max_len, ),
                                       dtype='int32',
                                       name="sequence_layer_claim_input")
    sequence_layer_debate_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_debate_input")

    # now define embedded layers of the input
    embedded_layer_warrant0_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_warrant0_input)
    embedded_layer_warrant1_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_warrant1_input)
    embedded_layer_reason_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_reason_input)
    embedded_layer_claim_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_claim_input)
    embedded_layer_debate_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_debate_input)

    bidi_lstm_layer_reason = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM Reason')(embedded_layer_reason_input)
    bidi_lstm_layer_claim = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM Claim')(embedded_layer_claim_input)
    # add context to the attention layer
    bidi_lstm_layer_debate = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM Context')(embedded_layer_debate_input)

    if rich_context:
        # merge reason and claim
        context_concat = merge([
            bidi_lstm_layer_reason, bidi_lstm_layer_claim,
            bidi_lstm_layer_debate
        ],
                               mode='concat')
    else:
        context_concat = merge([bidi_lstm_layer_reason, bidi_lstm_layer_claim],
                               mode='concat')

    # max-pooling
    max_pool_lambda_layer = Lambda(
        lambda x: keras.backend.max(x, axis=1, keepdims=False),
        output_shape=lambda x: (x[0], x[2]))
    max_pool_lambda_layer.supports_masking = True
    attention_vector = max_pool_lambda_layer(context_concat)

    attention_warrant0 = AttentionLSTM(
        lstm_size, attention_vector)(embedded_layer_warrant0_input)
    attention_warrant1 = AttentionLSTM(
        lstm_size, attention_vector)(embedded_layer_warrant1_input)

    # concatenate them
    dropout_layer = Dropout(dropout)(merge(
        [attention_warrant0, attention_warrant1]))

    # and add one extra layer with ReLU
    dense1 = Dense(int(lstm_size / 2), activation='relu')(dropout_layer)
    output_layer = Dense(1, activation='sigmoid')(dense1)

    model = Model([
        sequence_layer_warrant0_input, sequence_layer_warrant1_input,
        sequence_layer_reason_input, sequence_layer_claim_input,
        sequence_layer_debate_input
    ],
                  output=output_layer)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    from keras.utils.visualize_util import plot
    plot(model, show_shapes=True, to_file='/tmp/model-att.png')

    # from keras.utils.visualize_util import plot
    # plot(model, show_shapes=True, to_file='/tmp/attlstm.png')

    return model
Esempio n. 6
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = self.model_params.get('initial_embed_weights', None)
        weights = weights if weights is None else [weights]
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=self.model_params.get(
                                  'n_embed_dims', 100),
                              weights=weights,
                              mask_zero=True)
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # turn off layer updating
        # embedding.params = []
        # embedding.updates = []

        # dropout
        dropout = Dropout(0.25)
        question_dropout = dropout(question_embedding)
        answer_dropout = dropout(answer_embedding)

        # question rnn part
        f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141),
                     return_sequences=True,
                     dropout_U=0.2,
                     consume_less='mem')
        b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141),
                     return_sequences=True,
                     dropout_U=0.2,
                     consume_less='mem',
                     go_backwards=True)
        question_f_rnn = f_rnn(question_dropout)
        question_b_rnn = b_rnn(question_dropout)
        question_f_dropout = dropout(question_f_rnn)
        question_b_dropout = dropout(question_b_rnn)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        question_pool = merge(
            [maxpool(question_f_dropout),
             maxpool(question_b_dropout)],
            mode='concat',
            concat_axis=-1)

        # answer rnn part
        f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141),
                              question_pool,
                              single_attn=True,
                              dropout_U=0.2,
                              return_sequences=True,
                              consume_less='mem')
        b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141),
                              question_pool,
                              single_attn=True,
                              dropout_U=0.2,
                              return_sequences=True,
                              consume_less='mem',
                              go_backwards=True)
        answer_f_rnn = f_rnn(answer_dropout)
        answer_b_rnn = b_rnn(answer_dropout)
        answer_f_dropout = dropout(answer_f_rnn)
        answer_b_dropout = dropout(answer_b_rnn)
        answer_pool = merge(
            [maxpool(answer_f_dropout),
             maxpool(answer_b_dropout)],
            mode='concat',
            concat_axis=-1)

        # activation
        activation = Activation('tanh')
        question_output = activation(question_pool)
        answer_output = activation(answer_pool)

        return question_output, answer_output
def get_model(name,
              X_train,
              y_train,
              embeddings,
              batch_size,
              nb_epoch,
              max_len,
              max_features,
              nb_classes=17):
    print('Building model', name)

    # get correct loss
    loss_function = 'binary_crossentropy'

    if name == 'LSTM+ATT':
        # this is the placeholder tensor for the input sequences
        sequence = Input(shape=(max_len, ), dtype='int32')
        # this embedding layer will transform the sequences of integers
        # into vectors of size 128
        embedded = Embedding(embeddings.shape[0],
                             embeddings.shape[1],
                             input_length=max_len,
                             weights=[embeddings])(sequence)

        # 4 convolution layers (each 1000 filters)
        cnn = [
            Convolution1D(filter_length=filters,
                          nb_filter=1000,
                          border_mode='same') for filters in [2, 3, 5, 7]
        ]

        # concatenate
        question = merge([cnn(embedded) for cnn in cnn], mode='concat')

        # create attention vector from max-pooled convoluted
        maxpool = Lambda(
            lambda x: keras_backend.max(x, axis=1, keepdims=False),
            output_shape=lambda x: (x[0], x[2]))
        attention_vector = maxpool(question)

        forwards = AttentionLSTM(64, attention_vector)(embedded)
        backwards = AttentionLSTM(64, attention_vector,
                                  go_backwards=True)(embedded)

        # concatenate the outputs of the 2 LSTMs
        answer_rnn = merge([forwards, backwards],
                           mode='concat',
                           concat_axis=-1)

        after_dropout = Dropout(0.5)(answer_rnn)
        # we have 17 classes
        output = Dense(nb_classes, activation='sigmoid')(after_dropout)

        model = Model(input=sequence, output=output)

        # try using different optimizers and different optimizer configs
        model.compile('adam', loss_function, metrics=[loss_function])
        # model.compile('adam', 'hinge', metrics=['hinge'])

        print("Layers: ", model.layers)
        for layer in model.layers:
            if isinstance(layer, AttentionLSTM):
                print(type(layer.attention_vec))
                # print('Attention vector shape:', layer.attention_vec.shape) -- doesn't print anything... piece of sh*t

        model.fit(X_train,
                  y_train,
                  batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  validation_split=0.1,
                  verbose=1)

        return model

    if name == 'LSTM':
        # this is the placeholder tensor for the input sequences
        sequence = Input(shape=(max_len, ), dtype='int32')
        # this embedding layer will transform the sequences of integers
        # into vectors of size 128
        embedded = Embedding(embeddings.shape[0],
                             embeddings.shape[1],
                             input_length=max_len,
                             weights=[embeddings])(sequence)

        # apply forwards and backward LSTM
        forwards = LSTM(64)(embedded)
        backwards = LSTM(64, go_backwards=True)(embedded)

        # concatenate the outputs of the 2 LSTMs
        answer_rnn = merge([forwards, backwards],
                           mode='concat',
                           concat_axis=-1)

        after_dropout = Dropout(0.5)(answer_rnn)
        # we have 17 classes
        output = Dense(nb_classes, activation='sigmoid')(after_dropout)

        model = Model(input=sequence, output=output)

        # try using different optimizers and different optimizer configs
        model.compile('adam', loss_function, metrics=[loss_function])

        model.fit(X_train,
                  y_train,
                  batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  validation_split=0.1,
                  verbose=0)

        return model

    if name == 'MLP':
        model = Sequential()
        model.add(Dense(512, input_shape=(max_len, )))
        model.add(Activation('relu'))
        model.add(Dropout(0.5))
        model.add(Dense(nb_classes))
        model.add(Activation('softmax'))
        model.compile(loss=loss_function,
                      optimizer='adam',
                      metrics=[loss_function])

        model.fit(X_train,
                  y_train,
                  nb_epoch=nb_epoch,
                  batch_size=batch_size,
                  validation_split=0.1,
                  verbose=0)

        return model
Esempio n. 8
0
    def build(self):
        question, answer = self._get_inputs()

        # add embedding layers
        embedding = Embedding(self.config['n_words'],
                              self.model_params.get('n_embed_dims', 141))
        question_embedding = embedding(question)

        a_embedding = Embedding(self.config['n_words'],
                                self.model_params.get('n_embed_dims', 141))
        answer_embedding = embedding(answer)

        a_embedding.set_weights(embedding.get_weights())

        # dropout
        dropout = Dropout(0.5)
        question_dropout = dropout(question_embedding)
        answer_dropout = dropout(answer_embedding)

        # rnn
        forward_lstm = LSTM(self.config.get('n_lstm_dims', 141),
                            consume_less='mem',
                            return_sequences=True)
        backward_lstm = LSTM(self.config.get('n_lstm_dims', 141),
                             consume_less='mem',
                             return_sequences=True)
        question_lstm = merge(
            [forward_lstm(question_dropout),
             backward_lstm(question_dropout)],
            mode='concat',
            concat_axis=-1)

        # dropout
        question_dropout = dropout(question_lstm)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        question_pool = maxpool(question_dropout)

        # activation
        activation = Activation('tanh')
        question_output = activation(question_pool)

        question_model = Model(input=[question], output=[question_output])

        # attentional rnn
        forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141),
                                     question_output,
                                     consume_less='mem',
                                     return_sequences=True)
        backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141),
                                      question_output,
                                      consume_less='mem',
                                      return_sequences=True)
        answer_lstm = merge(
            [forward_lstm(answer_dropout),
             backward_lstm(answer_dropout)],
            mode='concat',
            concat_axis=-1)

        # dropout
        answer_dropout = dropout(answer_lstm)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        answer_pool = maxpool(answer_dropout)

        # activation
        activation = Activation('tanh')
        answer_output = activation(answer_pool)

        answer_model = Model(input=[question, answer], output=[answer_output])

        return question_model, answer_model
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        embedding = Embedding(self.config['n_words'],
                              self.model_params.get('n_embed_dims', 100))
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # turn off layer updating
        embedding.params = []
        embedding.updates = []

        # dropout
        dropout = Dropout(0.25)
        question_dropout = dropout(question_embedding)
        answer_dropout = dropout(answer_embedding)

        # question rnn part
        f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141),
                     return_sequences=True)
        b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141),
                     return_sequences=True,
                     go_backwards=True)
        question_rnn = merge(
            [f_rnn(question_dropout),
             b_rnn(question_dropout)],
            mode='concat',
            concat_axis=-1)
        question_dropout = dropout(question_rnn)

        # regularize
        regularize = ActivityRegularization(l2=0.0001)
        question_dropout = regularize(question_dropout)

        # could add convolution layer here (as in paper)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        question_pool = maxpool(question_dropout)

        # answer rnn part
        f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141),
                              question_pool,
                              return_sequences=True)
        b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141),
                              question_pool,
                              return_sequences=True,
                              go_backwards=True)
        # f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True)
        # b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, go_backwards=True)
        answer_rnn = merge([f_rnn(answer_dropout),
                            b_rnn(answer_dropout)],
                           mode='concat',
                           concat_axis=-1)
        answer_dropout = dropout(answer_rnn)
        answer_dropout = regularize(answer_dropout)
        answer_pool = maxpool(answer_dropout)

        # activation
        activation = Activation('tanh')
        question_output = activation(question_pool)
        answer_output = activation(answer_pool)

        return question_output, answer_output