Esempio n. 1
0
	def build(self):
		assert self.config['questions_len'] == self.config['answers_len']

		question = self.questions
		answer = self.get_answer()

		weights = np.load(self.config['initial_embed_weights'])
		embedding = Embedding(input_dim = self.config['n_words'],
							  output_dim = weights.shape[1],
							  weights = [weights])
		question_embedding = embedding(question)
		answer_embedding = embedding(answer)
		# Dense()
		hidden_layer = TimeDistributed(Dense(200,activation = 'tanh'))
		# 输入层处理
		question_hl = hidden_layer(question_embedding)
		answer_hl = hidden_layer(answer_embedding)

		# 一维卷积核
		cnns = [Conv1D(kernel_size = kernel_size,
			            filters = 1000,
			            activation = 'tanh',
			            padding = 'same') for kernel_size in [2,3,5,7]]

		# 卷积层输出
		question_cnn = concatenate([cnn(question_hl) for cnn in cnns],axis = -1)
		answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns],axis = -1)
		# 池化层输出
		maxpool = Lambda(lambda x: K.max(x,axis = 1,keepdims = False), output_shape = lambda x: (x[0],x[2]))
		maxpool.supports_masking = True
		# maxpooling层输出
		question_pool = maxpool(question_hl)
		answer_pool = maxpool(answer_hl)

		return question_pool, answer_pool
Esempio n. 2
0
    def build(self):
        assert self.config['question_len'] == self.config['answer_len']

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # cnn
        cnns = [Convolution1D(filter_length=filter_length,
                              nb_filter=500,
                              activation='tanh',
                              border_mode='same') for filter_length in [2, 3, 5, 7]]
        question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
        answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        enc = Dense(100, activation='tanh')
        question_pool = enc(maxpool(question_cnn))
        answer_pool = enc(maxpool(answer_cnn))

        return question_pool, answer_pool
Esempio n. 3
0
def cit_nocit_rnn_rnn_cnn(max_sentence_len, max_words):
    inp = Input(shape=(max_sentence_len, ))
    emb = Embedding(max_words, 128, input_length=max_sentence_len)(inp)

    fwd_rnn = LSTM(128, return_sequences=True)(emb)
    rev_rnn = LSTM(128, return_sequences=True, go_backwards=True)(emb)

    merged = concatenate([fwd_rnn, rev_rnn], axis=-1)

    cnns = [
        Conv1D(500, filter_length, activation='tanh', padding='same')
        for filter_length in [1, 2, 3, 5]
    ]
    allCnns = concatenate([cnn(merged) for cnn in cnns])

    maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    maxpool.supports_masking = True

    pooled = maxpool(allCnns)
    dense = Dense(2, activation='sigmoid')(pooled)

    model = Model(inputs=inp, outputs=dense)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
Esempio n. 4
0
	def build(self):
		question = self.questions
		answer = self.get_answer()

		weights = np.load(self.config['initial_embed_weights'])
		embedding = Embedding(input_dim = self.config['n_words'],
			                  output_dim = weights.shape[1],
			                  weights = [weights])
		question_embedding = embedding(question)
		answer_embedding = embedding(answer)


		f_rnn = LSTM(141, return_sequences = True, consume_less = 'mem')
		b_rnn = LSTM(141, return_sequences = True,consume_less = 'mem', go_backwards = True)

		question_f_rnn = f_rnn(question_embedding)
		question_b_rnn = b_rnn(question_embedding)

		# 池化操作
		maxpool = Lambda( lambda x: K.max(x, axis = 1, keepdims = False),output_shape = lambda x:(x[0],x[2]))
		maxpool.supports_masking = True
		question_pool = merge([maxpool(question_f_rnn),maxpool(question_b_rnn)],mode = 'concat',concat_axis = -1)

		answer_f_rnn = f_rnn(answer_embedding)
		answer_b_rnn = b_rnn(answer_embedding)
		answer_pool = merge([maxpool(answer_f_rnn),maxpool(answer_b_rnn)],mode = 'concat',concat_axis = -1)

		return question_pool,answer_pool
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        question_weights = np.load(self.config.initial_question_weights())
        q_embedding = Embedding(input_dim=question_weights.shape[0],
                                output_dim=question_weights.shape[1],
                                weights=[question_weights])
        question_embedding = q_embedding(question)

        answer_weights = np.load(self.config.initial_answer_weights())
        a_embedding = Embedding(input_dim=answer_weights.shape[0],
                                output_dim=answer_weights.shape[1],
                                weights=[answer_weights])
        answer_embedding = a_embedding(answer)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_embedding)
        answer_pool = maxpool(answer_embedding)

        return question_pool, answer_pool
Esempio n. 6
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              # mask_zero=True,
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # question rnn part
        f_rnn = LSTM(141, return_sequences=True, consume_less='mem')
        b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True)
        question_f_rnn = f_rnn(question_embedding)
        question_b_rnn = b_rnn(question_embedding)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1)

        # answer rnn part
        from attention_lstm import AttentionLSTMWrapper
        f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True)
        b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True)

        answer_f_rnn = f_rnn(answer_embedding)
        answer_b_rnn = b_rnn(answer_embedding)
        answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1)

        return question_pool, answer_pool
Esempio n. 7
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              # mask_zero=True,
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # question rnn part
        f_rnn = LSTM(141, return_sequences=True, consume_less='mem')
        b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True)
        question_f_rnn = f_rnn(question_embedding)
        question_b_rnn = b_rnn(question_embedding)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1)

        # answer rnn part
        from attention_lstm import AttentionLSTMWrapper
        f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True)
        b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True)

        answer_f_rnn = f_rnn(answer_embedding)
        answer_b_rnn = b_rnn(answer_embedding)
        answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1)

        return question_pool, answer_pool
Esempio n. 8
0
    def build(self):
        assert self.config['question_len'] == self.config['answer_len']

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # cnn
        cnns = [Convolution1D(filter_length=filter_length,
                              nb_filter=500,
                              activation='tanh',
                              border_mode='same') for filter_length in [2, 3, 5, 7]]
        question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
        answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        enc = Dense(100, activation='tanh')
        question_pool = enc(maxpool(question_cnn))
        answer_pool = enc(maxpool(answer_cnn))

        return question_pool, answer_pool
Esempio n. 9
0
	def build(self):
		question = self.questions
		answer = self.get_answer()

		weights = np.load(self.config['initial_embed_weights'])
		embedding = Embedding(input_dim = self.config['n_words'],
			                  output_dim = weights.shape[1],
			                  weights = [weights])
		question_embedding = embedding(question)
		answer_embedding = embedding(answer)

		hidden_layer = TimeDistributed(Dense(200,activation = 'tanh'))

		question_hl = hidden_layer(question_embedding)
		answer_hl = hidden_layer(answer_embedding)

		cnns = [Conv2D(filters = 3, 
			           kernel_size =(),
			           activation = 'tanh',
			           padding = 'same')]
		#question_cnn = 

		maxpool = Lambda( lambda x: K.max(x,axis = 1,keepdims = False),output_shape = lambda x: (x[0],x[2]))
		maxpool.supports_masking = True

		question_pool = maxpool(question_cnn)
		answer_pool = maxpool(answer_cnn)

		return question_pool,answer_pool
Esempio n. 10
0
	def BIGRUCNNmodel(self,weights, hidden_dim = 128):
		margin = 0.05
		enc_timesteps = 30
		dec_timesteps = 30
		# hidden_dim = 128

        # initialize the question and answer shapes and datatype
		question = Input(shape=(enc_timesteps,), dtype='int32', name='question_base')
		answer = Input(shape=(dec_timesteps,), dtype='int32', name='answer')
		answer_good = Input(shape=(dec_timesteps,), dtype='int32', name='answer_good_base')
		answer_bad = Input(shape=(dec_timesteps,), dtype='int32', name='answer_bad_base')

		qa_embedding = Embedding(input_dim=len(weights),output_dim=weights.shape[1],mask_zero=True,weights=[weights], trainable=False)
		question_embedding = qa_embedding(question)
		answer_embedding = qa_embedding(answer)

		# pass the question embedding through bi-lstm
		gru1 = Bidirectional(GRU(units=hidden_dim, dropout=0.2, recurrent_dropout=0.2,  return_sequences=True, name='BIGRU'), merge_mode='concat');
		question_pool1 = gru1(question_embedding);
		answer_pool1 = gru1(answer_embedding);
		gru2 = Bidirectional(GRU(units=int(hidden_dim/2), dropout=0.2, recurrent_dropout=0.2,  return_sequences=True, name='BIGRU2'), merge_mode='concat');
		question_pool = gru2(question_pool1);
		answer_pool = gru2(answer_pool1);

		# pass the embedding from bi-lstm through cnn
		cnns = [Convolution1D(filter_length=filter_length,nb_filter=300,activation='relu',border_mode='same', kernel_initializer='random_normal') for filter_length in [1, 2, 3, 5]] 
		for cnn in cnns:
			cnn.supports_masking = True
		question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') # merge: (None,30,500)*4->(None,30,2000)
		answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

		
        # apply max pooling
		maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
		# maxpool.__setattr__('supports_masking',True)
		maxpool.supports_masking = True
		question_pool = maxpool(question_cnn)
		answer_pool = maxpool(answer_cnn)


        # get the cosine similarity
		similarity = self.get_cosine_similarity()
		merged_model = merge([question_pool, answer_pool],mode=similarity, output_shape=lambda _: (None, 1))
		lstm_convolution_model = Model(inputs=[question, answer], outputs=merged_model, name='bigru_convolution_model')
		print(lstm_convolution_model.summary())
		good_similarity = lstm_convolution_model([question, answer_good])
		bad_similarity = lstm_convolution_model([question, answer_bad])

        # compute the loss
		loss = merge([good_similarity, bad_similarity],mode=lambda x: K.relu(margin - x[0] + x[1]),output_shape=lambda x: x[0])

        # return the training and prediction model
		prediction_model = Model(inputs=[question, answer_good], outputs=good_similarity, name='prediction_model')
		prediction_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="adam")
		training_model = Model(inputs=[question, answer_good, answer_bad], outputs=loss, name='training_model')
		training_model.compile(loss=lambda y_true, y_pred: y_pred, optimizer="adam")

		print(training_model.summary())
		return training_model, prediction_model
    def get_convolutional_lstm_model(model_param, embedding_file, vocab_size):
        hidden_dim = 200
        weights = np.load(embedding_file)
        question = Input(shape=(model_param.enc_timesteps, ),
                         dtype='int32',
                         name='question_base')
        answer = Input(shape=(model_param.dec_timesteps, ),
                       dtype='int32',
                       name='answer_good_base')
        QaEmbedding = Embedding(
            input_dim=vocab_size,
            output_dim=weights.shape[1],
            # dropout=0.2,
            weights=[weights])
        question_embedding = QaEmbedding(question)
        answer_embedding = QaEmbedding(answer)
        f_rnn = LSTM(hidden_dim, return_sequences=True)
        #b_rnn = LSTM(hidden_dim, return_sequences=True,go_backwards=True)
        b_rnn = LSTM(hidden_dim, return_sequences=True)

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)

        cnns = [
            Convolution1D(filter_length=filter_length,
                          nb_filter=500,
                          activation='tanh',
                          border_mode='same')
            for filter_length in [1, 2, 3, 5]
        ]

        question_cnn = merge([cnn(question_pool) for cnn in cnns],
                             mode='concat')
        answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True

        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        similarity = ModelFactory.get_similarity("cosine")
        basic_model = merge([question_pool, answer_pool],
                            mode=similarity,
                            output_shape=lambda _: (None, 1))
        lstm_convolution_model = Model(input=[question, answer],
                                       output=basic_model,
                                       name='basic_model')
        return lstm_convolution_model
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        question_weights = np.load(self.config.initial_question_weights())
        q_embedding = Embedding(input_dim=question_weights.shape[0],
                                output_dim=question_weights.shape[1],
                                weights=[question_weights])
        question_embedding = q_embedding(question)

        answer_weights = np.load(self.config.initial_answer_weights())
        a_embedding = Embedding(input_dim=answer_weights.shape[0],
                                output_dim=answer_weights.shape[1],
                                weights=[answer_weights])
        answer_embedding = a_embedding(answer)

        # question rnn part
        f_rnn = LSTM(141, return_sequences=True, implementation=1)
        b_rnn = LSTM(141,
                     return_sequences=True,
                     implementation=1,
                     go_backwards=True)
        question_f_rnn = f_rnn(question_embedding)
        question_b_rnn = b_rnn(question_embedding)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        # question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1)
        question_pool = concatenate(
            [maxpool(question_f_rnn),
             maxpool(question_b_rnn)])

        # answer rnn part
        from attention_lstm import AttentionLSTMWrapper
        f_rnn = AttentionLSTMWrapper(f_rnn,
                                     question_pool,
                                     single_attention_param=True)
        b_rnn = AttentionLSTMWrapper(b_rnn,
                                     question_pool,
                                     single_attention_param=True)

        answer_f_rnn = f_rnn(answer_embedding)
        answer_b_rnn = b_rnn(answer_embedding)
        #answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1)
        answer_pool = concatenate(
            [maxpool(answer_f_rnn),
             maxpool(answer_b_rnn)])

        return question_pool, answer_pool
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        question_weights = np.load(self.config.initial_question_weights())
        q_embedding = Embedding(input_dim=question_weights.shape[0],
                                output_dim=question_weights.shape[1],
                                weights=[question_weights])
        question_embedding = q_embedding(question)

        answer_weights = np.load(self.config.initial_answer_weights())
        a_embedding = Embedding(input_dim=answer_weights.shape[0],
                                output_dim=answer_weights.shape[1],
                                weights=[answer_weights])
        answer_embedding = a_embedding(answer)

        f_rnn = LSTM(141, return_sequences=True, implementation=1)
        b_rnn = LSTM(141,
                     return_sequences=True,
                     implementation=1,
                     go_backwards=True)

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
        question_pool = concatenate([qf_rnn, qb_rnn], axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)
        answer_pool = concatenate([af_rnn, ab_rnn], axis=-1)

        # cnn
        cnns = [
            Conv1D(kernel_size=kernel_size,
                   filters=100,
                   activation='tanh',
                   padding='same') for kernel_size in [1, 2, 3, 5]
        ]
        # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
        question_cnn = concatenate([cnn(question_pool) for cnn in cnns])
        # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')
        answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns])

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
    def build(self):
        assert self.config.question_len() == self.config.answer_len()

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        question_weights = np.load(self.config.initial_question_weights())
        q_embedding = Embedding(input_dim=question_weights.shape[0],
                                output_dim=question_weights.shape[1],
                                weights=[question_weights])
        question_embedding = q_embedding(question)

        answer_weights = np.load(self.config.initial_answer_weights())
        a_embedding = Embedding(input_dim=answer_weights.shape[0],
                                output_dim=answer_weights.shape[1],
                                weights=[answer_weights])
        answer_embedding = a_embedding(answer)

        hidden_layer = TimeDistributed(Dense(200, activation='tanh'))

        question_hl = hidden_layer(question_embedding)
        answer_hl = hidden_layer(answer_embedding)

        # cnn
        cnns = [
            Conv1D(kernel_size=kernel_size,
                   filters=100,
                   activation='tanh',
                   padding='same') for kernel_size in [2, 3, 5, 7]
        ]
        # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
        question_cnn = concatenate([cnn(question_hl) for cnn in cnns])
        # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')
        answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns])

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        # enc = Dense(100, activation='tanh')
        # question_pool = enc(maxpool(question_cnn))
        # answer_pool = enc(maxpool(answer_cnn))
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 15
0
	def build(self):
		question = self.question
		answer = self.get_answer()

		weights = np.load(self.config['initial_embed_weights'])
		embedding = Embedding(input_dim = self.config['n_words'],
			                  output_dim = weights.shape[1],
			                  mask_zero = True,
			                  weights = [weights])
		question_embedding = embedding(question)
		answer_embedding = embedding(answer)


		maxpool = Lambda(lambda x: K.max(x,axis = 1, keepdims = False),output_shape = lambda x:(x[0],x[2]))
		maxpool.supports_masking = True
		question_pool = maxpool(question_embedding)
		answer_pool = maxpool(answer_embedding)

		return question_pool,answer_pool
Esempio n. 16
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        f_rnn = LSTM(141, return_sequences=True, consume_less='mem')
        b_rnn = LSTM(141, return_sequences=True, consume_less='mem')

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)

        # cnn
        cnns = [
            Convolution1D(filter_length=filter_length,
                          nb_filter=500,
                          activation='tanh',
                          border_mode='same')
            for filter_length in [1, 2, 3, 5]
        ]
        question_cnn = merge([cnn(question_pool) for cnn in cnns],
                             mode='concat')
        answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 17
0
def get_attention_vectors(bidi_layers, rich_context=True):
    bl = bidi_layers
    # max-pooling
    max_pool_lambda_layer = Lambda(
        lambda x: keras.backend.max(x, axis=1, keepdims=False),
        output_shape=lambda x: (x[0], x[2]))
    max_pool_lambda_layer.supports_masking = True

    if rich_context:
        attention_vector_for_w0 = max_pool_lambda_layer(
            concatenate([bl[2], bl[3], bl[0], bl[4], bl[5]]))
        attention_vector_for_w1 = max_pool_lambda_layer(
            concatenate([bl[2], bl[3], bl[1], bl[4], bl[5]]))
    else:
        attention_vector_for_w0 = max_pool_lambda_layer(
            concatenate([bl[2], bl[3], bl[0]]))
        attention_vector_for_w1 = max_pool_lambda_layer(
            concatenate([bl[2], bl[3], bl[1]]))

    return [attention_vector_for_w0, attention_vector_for_w1]
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        f_rnn = LSTM(141, return_sequences=True, implementation=1)
        b_rnn = LSTM(141, return_sequences=True, implementation=1, go_backwards=True)

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
        question_pool = concatenate([qf_rnn, qb_rnn], axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)
        answer_pool = concatenate([af_rnn, ab_rnn], axis=-1)

        # cnn
        cnns = [Conv1D(kernel_size=kernel_size,
                       filters=500,
                       activation='tanh',
                       padding='same') for kernel_size in [1, 2, 3, 5]]
        # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
        question_cnn = concatenate([cnn(question_pool) for cnn in cnns], axis=-1)
        # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')
        answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns], axis=-1)

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 19
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              mask_zero=True,
                              # dropout=0.2,
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_embedding)
        answer_pool = maxpool(answer_embedding)

        return question_pool, answer_pool
Esempio n. 20
0
	def build(self):
		question  = self.questions
		answer = self.get_answer()

		weights = np.load(self.config['initial_embed_weights'])
		embedding = Embedding(input_dim = self.config['n_words'],
							  output_dim = weights.shape[1],
							  weights = [weights])
		question_embedding = embedding(question)
		answer_embedding = embedding(answer)

		f_rnn = LSTM(141,return_sequences = True, implementation = 1)
		b_rnn = LSTM(141,return_sequences = True, implementation = 1, go_backwards = True)

		qf_rnn = f_rnn(question_embedding)
		qb_rnn = b_rnn(question_embedding)
		#
		question_pool = concatenate([qf_rnn,qb_rnn], axis = -1)

		af_rnn = f_rnn(answer_embedding)
		ab_rnn = f_rnn(answer_embedding)

		answer_pool = concatenate([af_rnn,ab_rnn], axis = -1)

		#cnn
		cnns = [Conv1D(kernel_size = kernel_size,
			           filters = 500,
			           activation = 'tanh',
			           padding = 'same') for kernel_size in [1,2,3,5]]

		question_cnn = concatenate([cnn(question_embedding) for cnn in cnns], axis = -1)
		answer_cnn = concatenate([cnn(answer_embedding) for cnn in cnns],axis = -1)

		maxpool = Lambda(lambda x: K.max(x,axis = 1, keepdims = False),output_shape = lambda x:(x[0],x[2]))
		maxpool.supports_masking = True
		question_pool = maxpool(question_cnn)
		answer_pool = maxpool(answer_cnn)

		return question_pool,answer_pool
    def build(self):
        assert self.config['question_len'] == self.config['answer_len']

        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        hidden_layer = TimeDistributed(Dense(200, activation='tanh'))

        question_hl = hidden_layer(question_embedding)
        answer_hl = hidden_layer(answer_embedding)

        # cnn
        cnns = [Conv1D(kernel_size=kernel_size,
                       filters=1000,
                       activation='tanh',
                       padding='same') for kernel_size in [2, 3, 5, 7]]
        # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
        question_cnn = concatenate([cnn(question_hl) for cnn in cnns], axis=-1)
        # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')
        answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns], axis=-1)

        # maxpooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        # enc = Dense(100, activation='tanh')
        # question_pool = enc(maxpool(question_cnn))
        # answer_pool = enc(maxpool(answer_cnn))
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 22
0
    def build(self):
        question = self.question
        answer = self.get_answer()

        # add embedding layers
        weights = np.load(self.config['initial_embed_weights'])
        embedding = Embedding(input_dim=self.config['n_words'],
                              output_dim=weights.shape[1],
                              weights=[weights])
        question_embedding = embedding(question)
        answer_embedding = embedding(answer)

        f_rnn = LSTM(141, return_sequences=True, consume_less='mem')
        b_rnn = LSTM(141, return_sequences=True, consume_less='mem')

        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)

        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)

        # cnn
        cnns = [Convolution1D(filter_length=filter_length,
                          nb_filter=500,
                          activation='tanh',
                          border_mode='same') for filter_length in [1, 2, 3, 5]]
        question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
        answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        return question_pool, answer_pool
Esempio n. 23
0
def get_baseline_model(options: dict, embedding: np.ndarray):
    # converting embeddings to numpy 2d array: shape = (vocabulary_size, emb_dim)
    max_len = options.get('padding')
    lstm_size = options.get('lstm_size')
    dropout = options.get('dropout')
    optimizer = options.get('optimizer')
    loss = options.get('loss')
    activation1 = options.get('activation1')
    activation2 = options.get('activation2')

    print('LSTM_01: embeddings.shape', embedding.shape)

    # define basic four input layers - for warrant0, warrant1, reason, claim
    sequence_layer_warrant0_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_input_warrant0")
    sequence_layer_warrant1_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_input_warrant1")
    sequence_layer_reason_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_input_reason")
    sequence_layer_claim_input = Input(shape=(max_len, ),
                                       dtype='int32',
                                       name="sequence_layer_input_claim")
    sequence_layer_debate_title_input = Input(
        shape=(max_len, ),
        dtype='int32',
        name="sequence_layer_input_debateTitle")
    sequence_layer_debate_info_input = Input(
        shape=(max_len, ),
        dtype='int32',
        name="sequence_layer_input_debateInfo")

    # now define embedded layers of the input
    embedded_layer_warrant0_input = Embedding(
        embedding.shape[0],
        embedding.shape[1],
        input_length=max_len,
        weights=[embedding],
        mask_zero=True)(sequence_layer_warrant0_input)
    embedded_layer_warrant1_input = Embedding(
        embedding.shape[0],
        embedding.shape[1],
        input_length=max_len,
        weights=[embedding],
        mask_zero=True)(sequence_layer_warrant1_input)

    bidi_lstm_layer_warrant0 = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM_W0')(embedded_layer_warrant0_input)
    bidi_lstm_layer_warrant1 = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM_W1')(embedded_layer_warrant1_input)

    # max-pooling
    max_pool_lambda_layer = Lambda(
        lambda x: keras.backend.max(x, axis=1, keepdims=False),
        output_shape=lambda x: (x[0], x[2]))
    max_pool_lambda_layer.supports_masking = True

    attention_warrant0 = LSTM(lstm_size)(bidi_lstm_layer_warrant0)
    attention_warrant1 = LSTM(lstm_size)(bidi_lstm_layer_warrant1)

    # concatenate them
    dropout_layer = Dropout(dropout)(concatenate(
        [attention_warrant0, attention_warrant1]))

    # and add one extra dense layer
    dense1 = Dense(int(lstm_size), activation=activation1)(dropout_layer)
    output_layer = Dense(1, activation=activation2)(dense1)

    model = Model(inputs=[
        sequence_layer_warrant0_input, sequence_layer_warrant1_input,
        sequence_layer_reason_input, sequence_layer_claim_input,
        sequence_layer_debate_title_input, sequence_layer_debate_info_input
    ],
                  outputs=output_layer)
    model.compile(loss=loss,
                  optimizer=optimizer,
                  metrics=['accuracy', dev_pred])

    return model
Esempio n. 24
0
def cnn_lstm_f1():
    with open('vocab.data', 'rb') as fin:
        vocab = pickle.load(fin)

    question1 = Input(shape=(20, ))
    question2 = Input(shape=(20, ))

    q1 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=20,
                   trainable=False)(question1)

    q2 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=20,
                   trainable=False)(question2)

    f_rnn = LSTM(30, return_sequences=True, implementation=1)
    b_rnn = LSTM(30,
                 return_sequences=True,
                 implementation=1,
                 go_backwards=True)

    pos = Position_Embedding(mode='concat')
    att = Attention(20)

    q1 = BatchNormalization()(q1)
    qf_rnn = f_rnn(q1)
    qb_rnn = b_rnn(q1)

    q1_rnn = concatenate([qf_rnn, qb_rnn], axis=-1)
    q1_rnn = pos(q1_rnn)
    q1_rnn = concatenate([q1_rnn, att(q1_rnn)])

    q2 = BatchNormalization()(q2)
    af_rnn = f_rnn(q2)
    ab_rnn = b_rnn(q2)
    q2_rnn = concatenate([af_rnn, ab_rnn], axis=-1)
    q2_rnn = pos(q2_rnn)
    q2_rnn = concatenate([q2_rnn, att(q2_rnn)])

    # cnn
    cnns = [
        Conv1D(kernel_size=kernel_size,
               filters=100,
               activation='tanh',
               padding='same') for kernel_size in [1, 2, 3, 5]
    ]
    # qq_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
    q1_cnn = concatenate([cnn(q1_rnn) for cnn in cnns], axis=-1)
    # q2_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')
    q2_cnn = concatenate([cnn(q2_rnn) for cnn in cnns], axis=-1)

    maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    maxpool.supports_masking = True
    q1_pool = Dropout(0.05)(maxpool(q1_cnn))
    q2_pool = Dropout(0.05)(maxpool(q2_cnn))
    merged1 = Dense(100, activation='relu')(q1_pool)
    merged2 = Dense(100, activation='relu')(q2_pool)
    merged = concatenate([merged1, merged2])

    is_duplicate = Dense(1, activation='sigmoid')(merged)

    model = Model(inputs=[question1, question2], outputs=is_duplicate)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Esempio n. 25
0
                                             unroll=self.layer.unroll,
                                             input_length=input_shape[1])
        if self.layer.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.layer.states[i], states[i]))

        if self.layer.return_sequences:
            return outputs
        else:
            return last_output


Maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                 output_shape=lambda x: (x[0], x[2]))
Maxpool.supports_masking = True


def Encoder(hidden_size, activation=None, return_sequences=True, bidirectional=False, use_gru=True):
    if activation is None:
        activation = ELU()
    if use_gru:
        def _encoder(x):
            if bidirectional:
                branch_1 = GRU(hidden_size, activation='linear',
                               return_sequences=return_sequences, go_backwards=False)(x)
                branch_2 = GRU(hidden_size, activation='linear',
                               return_sequences=return_sequences, go_backwards=True)(x)
                x = concatenate([branch_1, branch_2])
                x = activation(x)
                return x
Esempio n. 26
0
    def LSTMCNN4model(self, weights, hidden_dim=100):
        margin = 0.05
        enc_timesteps = 30
        dec_timesteps = 30
        # hidden_dim = 100

        # initialize the question and answer shapes and datatype
        question = Input(shape=(enc_timesteps, ),
                         dtype='int32',
                         name='question_base')
        answer = Input(shape=(dec_timesteps, ), dtype='int32', name='answer')
        answer_good = Input(shape=(dec_timesteps, ),
                            dtype='int32',
                            name='answer_good_base')
        answer_bad = Input(shape=(dec_timesteps, ),
                           dtype='int32',
                           name='answer_bad_base')

        qa_embedding = Embedding(input_dim=len(weights),
                                 output_dim=weights.shape[1],
                                 mask_zero=True,
                                 weights=[weights],
                                 trainable=False)
        question_embedding = qa_embedding(question)
        answer_embedding = qa_embedding(answer)

        # pass the question embedding through bi-lstm
        f_rnn = LSTM(hidden_dim,
                     return_sequences=True,
                     recurrent_dropout=0.2,
                     dropout=0.2)
        b_rnn = LSTM(hidden_dim,
                     return_sequences=True,
                     recurrent_dropout=0.2,
                     dropout=0.2)
        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)
        print(answer_pool)

        # pass the embedding from bi-lstm through cnn
        cnns = [
            Convolution1D(filter_length=filter_length,
                          nb_filter=500,
                          activation='tanh',
                          border_mode='same')
            for filter_length in [1, 2, 3, 5]
        ]
        for cnn in cnns:
            cnn.supports_masking = True
        question_cnn = merge(
            [cnn(question_pool) for cnn in cnns],
            mode='concat')  # merge: (None,30,500)*4->(None,30,2000)
        answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

        drop = Dropout(0.2)
        question_cnn = drop(question_cnn)
        answer_cnn = drop(answer_cnn)

        # apply max pooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        # maxpool.__setattr__('supports_masking',True)
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        # get the cosine similarity
        similarity = self.get_cosine_similarity()
        merged_model = merge([question_pool, answer_pool],
                             mode=similarity,
                             output_shape=lambda _: (None, 1))
        lstm_convolution_model = Model(inputs=[question, answer],
                                       outputs=merged_model,
                                       name='lstm_convolution_model')
        # print(lstm_convolution_model.summary())
        good_similarity = lstm_convolution_model([question, answer_good])
        bad_similarity = lstm_convolution_model([question, answer_bad])

        # compute the loss
        loss = merge([good_similarity, bad_similarity],
                     mode=lambda x: K.relu(margin - x[0] + x[1]),
                     output_shape=lambda x: x[0])

        # return the training and prediction model
        adam = Adam(lr=0.001)
        prediction_model = Model(inputs=[question, answer_good],
                                 outputs=good_similarity,
                                 name='prediction_model')
        prediction_model.compile(loss=lambda y_true, y_pred: y_pred,
                                 optimizer=adam)
        training_model = Model(inputs=[question, answer_good, answer_bad],
                               outputs=loss,
                               name='training_model')
        training_model.compile(loss=lambda y_true, y_pred: y_pred,
                               optimizer=adam)

        # print(training_model.summary())
        return training_model, prediction_model
Esempio n. 27
0
def attention_lstm():
    with open('vocab.data', 'rb') as fin:
        vocab = pickle.load(fin)

    question1 = Input(shape=(15, ))
    question2 = Input(shape=(15, ))

    q1 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=15,
                   trainable=False)(question1)

    q2 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=15,
                   trainable=False)(question2)

    pos = Position_Embedding()
    f_rnn = LSTM(256, return_sequences=True, consume_less='mem')
    b_rnn = LSTM(256,
                 return_sequences=True,
                 consume_less='mem',
                 go_backwards=True)

    maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    maxpool.supports_masking = True

    q1 = pos(q1)
    q2 = pos(q2)
    qf_rnn = f_rnn(q1)
    qb_rnn = b_rnn(q1)
    # q1_rnn = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
    q1_rnn = concatenate([qf_rnn, qb_rnn], axis=-1)

    af_rnn = f_rnn(q2)
    ab_rnn = b_rnn(q2)
    # q2_rnn = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)
    q2_rnn = concatenate([af_rnn, ab_rnn], axis=-1)

    att = Attention(20)

    q1_att = maxpool(att([q1_rnn, q1_rnn, q1_rnn]))
    q1 = Dense(200, activation='relu')(q1_att)

    q2_att = maxpool(attention([q2_rnn, q2_rnn, q2_rnn]))
    q2 = Dense(200, activation='relu')(q2_att)

    merged = concatenate([q1, q2])
    merged = Dense(200, activation='relu')(merged)
    merged = Dropout(0)(merged)
    merged = BatchNormalization()(merged)

    is_duplicate = Dense(1, activation='sigmoid')(merged)

    model = Model(inputs=[question1, question2], outputs=is_duplicate)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Esempio n. 28
0
def get_attention_lstm(word_index_to_embeddings_map,
                       max_len,
                       rich_context: bool = False,
                       **kwargs):
    # converting embeddings to numpy 2d array: shape = (vocabulary_size, 300)
    embeddings = np.asarray([
        np.array(x, dtype=float32)
        for x in word_index_to_embeddings_map.values()
    ])
    print('embeddings.shape', embeddings.shape)

    lstm_size = kwargs.get('lstm_size')
    dropout = kwargs.get('dropout')
    assert lstm_size
    assert dropout

    # define basic four input layers - for warrant0, warrant1, reason, claim
    sequence_layer_warrant0_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_warrant0_input")
    sequence_layer_warrant1_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_warrant1_input")
    sequence_layer_reason_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_reason_input")
    sequence_layer_claim_input = Input(shape=(max_len, ),
                                       dtype='int32',
                                       name="sequence_layer_claim_input")
    sequence_layer_debate_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_debate_input")

    # now define embedded layers of the input
    embedded_layer_warrant0_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_warrant0_input)
    embedded_layer_warrant1_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_warrant1_input)
    embedded_layer_reason_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_reason_input)
    embedded_layer_claim_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_claim_input)
    embedded_layer_debate_input = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=max_len,
        weights=[embeddings],
        mask_zero=True)(sequence_layer_debate_input)

    bidi_lstm_layer_reason = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM Reason')(embedded_layer_reason_input)
    bidi_lstm_layer_claim = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM Claim')(embedded_layer_claim_input)
    # add context to the attention layer
    bidi_lstm_layer_debate = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM Context')(embedded_layer_debate_input)

    if rich_context:
        # merge reason and claim
        context_concat = merge([
            bidi_lstm_layer_reason, bidi_lstm_layer_claim,
            bidi_lstm_layer_debate
        ],
                               mode='concat')
    else:
        context_concat = merge([bidi_lstm_layer_reason, bidi_lstm_layer_claim],
                               mode='concat')

    # max-pooling
    max_pool_lambda_layer = Lambda(
        lambda x: keras.backend.max(x, axis=1, keepdims=False),
        output_shape=lambda x: (x[0], x[2]))
    max_pool_lambda_layer.supports_masking = True
    attention_vector = max_pool_lambda_layer(context_concat)

    attention_warrant0 = AttentionLSTM(
        lstm_size, attention_vector)(embedded_layer_warrant0_input)
    attention_warrant1 = AttentionLSTM(
        lstm_size, attention_vector)(embedded_layer_warrant1_input)

    # concatenate them
    dropout_layer = Dropout(dropout)(merge(
        [attention_warrant0, attention_warrant1]))

    # and add one extra layer with ReLU
    dense1 = Dense(int(lstm_size / 2), activation='relu')(dropout_layer)
    output_layer = Dense(1, activation='sigmoid')(dense1)

    model = Model([
        sequence_layer_warrant0_input, sequence_layer_warrant1_input,
        sequence_layer_reason_input, sequence_layer_claim_input,
        sequence_layer_debate_input
    ],
                  output=output_layer)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    from keras.utils.visualize_util import plot
    plot(model, show_shapes=True, to_file='/tmp/model-att.png')

    # from keras.utils.visualize_util import plot
    # plot(model, show_shapes=True, to_file='/tmp/attlstm.png')

    return model
Esempio n. 29
0
    def get_lstm_cnn_model(embedding_file, vocab_size):
        margin = 0.2
        hidden_dim = 141
        enc_timesteps = 200
        dec_timesteps = 200
        weights = np.load(embedding_file)

        # initialize the question and answer shapes and datatype
        question = Input(shape=(enc_timesteps, ),
                         dtype='int32',
                         name='question_base')
        answer = Input(shape=(dec_timesteps, ),
                       dtype='int32',
                       name='answer_good_base')
        answer_good = Input(shape=(dec_timesteps, ),
                            dtype='int32',
                            name='answer_good_base')
        answer_bad = Input(shape=(dec_timesteps, ),
                           dtype='int32',
                           name='answer_bad_base')

        # embed the question and answers
        qa_embedding = Embedding(input_dim=vocab_size,
                                 output_dim=weights.shape[1],
                                 weights=[weights])
        question_embedding = qa_embedding(question)
        print(question_embedding)
        exit()
        answer_embedding = qa_embedding(answer)

        # pass the question embedding through bi-lstm
        f_rnn = LSTM(hidden_dim, return_sequences=True)
        b_rnn = LSTM(hidden_dim, return_sequences=True)
        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = concatenate([qf_rnn, qb_rnn], axis=-1)
        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = concatenate([af_rnn, ab_rnn], axis=-1)

        filter_sizes = [2, 2]
        cnns = [
            Convolution1D(filters=500,
                          kernel_size=ngram_size,
                          activation='tanh',
                          padding='same') for ngram_size in filter_sizes
        ]

        question_cnn = concatenate([cnn(question_pool) for cnn in cnns])
        answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns])

        # apply max pooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        # get similarity similarity score
        merged_model = Dot(axes=1,
                           normalize=True)([question_pool, answer_pool])
        lstm_convolution_model = Model(inputs=[question, answer],
                                       outputs=merged_model,
                                       name='lstm_convolution_model')
        good_similarity = lstm_convolution_model([question, answer_good])
        bad_similarity = lstm_convolution_model([question, answer_bad])

        # compute the loss
        loss = Lambda(lambda x: K.relu(x[1] - x[0] + margin))(
            [good_similarity, bad_similarity])

        # return the training and prediction model
        prediction_model = Model(inputs=[question, answer_good],
                                 outputs=good_similarity,
                                 name='prediction_model')
        prediction_model.compile(loss=lambda y_true, y_pred: y_pred,
                                 optimizer="rmsprop")
        training_model = Model(inputs=[question, answer_good, answer_bad],
                               outputs=loss,
                               name='training_model')
        training_model.compile(loss=lambda y_true, y_pred: y_pred,
                               optimizer="rmsprop")

        return training_model, prediction_model
Esempio n. 30
0
def get_attention_lstm_intra_warrant_kb_pooled(word_index_to_embeddings_map,
                                               max_len,
                                               rich_context=False,
                                               lstm_size=32,
                                               warrant_lstm_size=32,
                                               dropout=0.1,
                                               kb_embeddings=None,
                                               fn_embeddings=None):
    # converting embeddings to numpy 2d array: shape = (vocabulary_size, 300)
    embeddings = np.asarray([
        np.array(x, dtype=np.float32)
        for x in word_index_to_embeddings_map.values()
    ])

    # max-pooling
    max_pool_lambda_layer = Lambda(
        lambda x: keras.backend.max(x, axis=1, keepdims=False),
        output_shape=lambda x: (x[0], x[2]))
    max_pool_lambda_layer.supports_masking = True
    # sum-pooling
    sum_pool_lambda_layer = Lambda(
        lambda x: keras.backend.sum(x, axis=1, keepdims=False),
        output_shape=lambda x: (x[0], x[2]))
    sum_pool_lambda_layer.supports_masking = True

    # define basic four input layers - for warrant0, warrant1, reason, claim
    sequence_layer_warrant0_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_warrant0_input")
    sequence_layer_warrant1_input = Input(shape=(max_len, ),
                                          dtype='int32',
                                          name="sequence_layer_warrant1_input")
    sequence_layer_reason_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_reason_input")
    sequence_layer_claim_input = Input(shape=(max_len, ),
                                       dtype='int32',
                                       name="sequence_layer_claim_input")
    sequence_layer_debate_input = Input(shape=(max_len, ),
                                        dtype='int32',
                                        name="sequence_layer_debate_input")

    # now define embedded layers of the input
    word_emb_layer = Embedding(embeddings.shape[0],
                               embeddings.shape[1],
                               input_length=max_len,
                               name='word_emb',
                               weights=[embeddings],
                               mask_zero=True)
    embedded_layer_warrant0_input = word_emb_layer(
        sequence_layer_warrant0_input)
    embedded_layer_warrant1_input = word_emb_layer(
        sequence_layer_warrant1_input)
    embedded_layer_reason_input = word_emb_layer(sequence_layer_reason_input)
    embedded_layer_claim_input = word_emb_layer(sequence_layer_claim_input)
    embedded_layer_debate_input = word_emb_layer(sequence_layer_debate_input)

    if kb_embeddings is not None:
        sequence_layer_warrant0_input_kb = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_warrant0_input_kb")
        sequence_layer_warrant1_input_kb = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_warrant1_input_kb")
        sequence_layer_reason_input_kb = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_reason_input_kb")
        sequence_layer_claim_input_kb = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_claim_input_kb")

        kb_emb_layer = Embedding(kb_embeddings.shape[0],
                                 kb_embeddings.shape[1],
                                 input_length=max_len,
                                 name='kb_emb_layer',
                                 weights=[kb_embeddings],
                                 mask_zero=True)
        embedded_layer_warrant0_input_kb = kb_emb_layer(
            sequence_layer_warrant0_input_kb)
        embedded_layer_warrant1_input_kb = kb_emb_layer(
            sequence_layer_warrant1_input_kb)
        embedded_layer_reason_input_kb = kb_emb_layer(
            sequence_layer_reason_input_kb)
        embedded_layer_claim_input_kb = kb_emb_layer(
            sequence_layer_claim_input_kb)

        kb_dense = Dense(lstm_size * 2, activation='relu')
        kb_vector_w0 = kb_dense(
            sum_pool_lambda_layer(
                concatenate([
                    embedded_layer_reason_input_kb,
                    embedded_layer_claim_input_kb,
                    embedded_layer_warrant0_input_kb
                ])))
        kb_vector_w1 = kb_dense(
            sum_pool_lambda_layer(
                concatenate([
                    embedded_layer_reason_input_kb,
                    embedded_layer_claim_input_kb,
                    embedded_layer_warrant1_input_kb
                ])))

    if fn_embeddings is not None:
        sequence_layer_warrant0_input_fn = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_warrant0_input_fn")
        sequence_layer_warrant1_input_fn = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_warrant1_input_fn")
        sequence_layer_reason_input_fn = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_reason_input_fn")
        sequence_layer_claim_input_fn = Input(
            shape=(max_len, ),
            dtype='int32',
            name="sequence_layer_claim_input_fn")

        fn_emb_layer = Embedding(fn_embeddings.shape[0],
                                 fn_embeddings.shape[1],
                                 input_length=max_len,
                                 name='fn_emb_layer',
                                 weights=[fn_embeddings],
                                 mask_zero=True)
        embedded_layer_warrant0_input_fn = fn_emb_layer(
            sequence_layer_warrant0_input_fn)
        embedded_layer_warrant1_input_fn = fn_emb_layer(
            sequence_layer_warrant1_input_fn)
        embedded_layer_reason_input_fn = fn_emb_layer(
            sequence_layer_reason_input_fn)
        embedded_layer_claim_input_fn = fn_emb_layer(
            sequence_layer_claim_input_fn)

        fn_dense = Dense(lstm_size * 2, activation='relu')
        fn_vector_w0 = fn_dense(
            sum_pool_lambda_layer(
                concatenate([
                    embedded_layer_reason_input_fn,
                    embedded_layer_claim_input_fn,
                    embedded_layer_warrant0_input_fn
                ])))
        fn_vector_w1 = fn_dense(
            sum_pool_lambda_layer(
                concatenate([
                    embedded_layer_reason_input_fn,
                    embedded_layer_claim_input_fn,
                    embedded_layer_warrant1_input_fn
                ])))

    bidi_lstm_layer_warrant0 = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM-W0')(embedded_layer_warrant0_input)
    bidi_lstm_layer_warrant1 = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM-W1')(embedded_layer_warrant1_input)
    bidi_lstm_layer_reason = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM-Reason')(embedded_layer_reason_input)
    bidi_lstm_layer_claim = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM-Claim')(embedded_layer_claim_input)
    # add context to the attention layer
    bidi_lstm_layer_debate = Bidirectional(
        LSTM(lstm_size, return_sequences=True),
        name='BiDiLSTM-Context')(embedded_layer_debate_input)

    # two attention vectors

    if rich_context:
        attention_vector_for_w0 = max_pool_lambda_layer(
            concatenate([
                bidi_lstm_layer_reason, bidi_lstm_layer_claim,
                bidi_lstm_layer_warrant1, bidi_lstm_layer_debate
            ]))
        attention_vector_for_w1 = max_pool_lambda_layer(
            concatenate([
                bidi_lstm_layer_reason, bidi_lstm_layer_claim,
                bidi_lstm_layer_warrant0, bidi_lstm_layer_debate
            ]))
    else:
        attention_vector_for_w0 = max_pool_lambda_layer(
            concatenate([
                bidi_lstm_layer_reason, bidi_lstm_layer_claim,
                bidi_lstm_layer_warrant1
            ]))
        attention_vector_for_w1 = max_pool_lambda_layer(
            concatenate([
                bidi_lstm_layer_reason, bidi_lstm_layer_claim,
                bidi_lstm_layer_warrant0
            ]))

    attention_warrant0 = AttentionLSTM(warrant_lstm_size)(
        bidi_lstm_layer_warrant0,
        constants=concatenate([
            attention_vector_for_w0,
            *((kb_vector_w0, ) if kb_embeddings is not None else ()),
            *((fn_vector_w0, ) if fn_embeddings is not None else ()),
        ]))
    attention_warrant1 = AttentionLSTM(warrant_lstm_size)(
        bidi_lstm_layer_warrant1,
        constants=concatenate([
            attention_vector_for_w1,
            *((kb_vector_w1, ) if kb_embeddings is not None else ()),
            *((fn_vector_w1, ) if fn_embeddings is not None else ()),
        ]))

    # concatenate them
    dropout_layer = Dropout(dropout)(concatenate([
        add([attention_warrant0, attention_warrant1]), attention_warrant0,
        attention_warrant1
    ]))

    # and add one extra layer with ReLU
    dense1 = Dense(int(warrant_lstm_size / 2),
                   activation='relu')(dropout_layer)
    output_layer = Dense(1, activation='sigmoid')(dense1)

    model = Model([
        sequence_layer_warrant0_input, sequence_layer_warrant1_input,
        sequence_layer_reason_input, sequence_layer_claim_input,
        sequence_layer_debate_input,
        *((sequence_layer_warrant0_input_kb, sequence_layer_warrant1_input_kb,
           sequence_layer_reason_input_kb,
           sequence_layer_claim_input_kb) if kb_embeddings is not None else
          ()),
        *((sequence_layer_warrant0_input_fn, sequence_layer_warrant1_input_fn,
           sequence_layer_reason_input_fn,
           sequence_layer_claim_input_fn) if fn_embeddings is not None else ())
    ],
                  output=output_layer)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
    def get_lstm_cnn_model(self, embedding_file, vocab_size):
        """
        Return the bilstm + cnn training and prediction model

        Args:
            embedding_file (str): embedding file name
            vacab_size (integer): size of the vocabulary

        Returns:
            training_model: model used to train using cosine similarity loss
            prediction_model: model used to predict the similarity
        """

        margin = 0.05
        hidden_dim = 200
        enc_timesteps = 150
        dec_timesteps = 150
        weights = np.load(embedding_file)

        # initialize the question and answer shapes and datatype
        question = Input(shape=(enc_timesteps, ),
                         dtype='int32',
                         name='question_base')
        answer = Input(shape=(dec_timesteps, ),
                       dtype='int32',
                       name='answer_good_base')
        answer_good = Input(shape=(dec_timesteps, ),
                            dtype='int32',
                            name='answer_good_base')
        answer_bad = Input(shape=(dec_timesteps, ),
                           dtype='int32',
                           name='answer_bad_base')

        # embed the question and answers
        qa_embedding = Embedding(input_dim=vocab_size,
                                 output_dim=weights.shape[1],
                                 weights=[weights])
        question_embedding = qa_embedding(question)
        answer_embedding = qa_embedding(answer)

        # pass the question embedding through bi-lstm
        f_rnn = LSTM(hidden_dim, return_sequences=True)
        b_rnn = LSTM(hidden_dim, return_sequences=True)
        qf_rnn = f_rnn(question_embedding)
        qb_rnn = b_rnn(question_embedding)
        question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
        af_rnn = f_rnn(answer_embedding)
        ab_rnn = b_rnn(answer_embedding)
        answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)

        # pass the embedding from bi-lstm through cnn
        cnns = [
            Convolution1D(filter_length=filter_length,
                          nb_filter=500,
                          activation='tanh',
                          border_mode='same')
            for filter_length in [1, 2, 3, 5]
        ]
        question_cnn = merge([cnn(question_pool) for cnn in cnns],
                             mode='concat')
        answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')

        # apply max pooling
        maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                         output_shape=lambda x: (x[0], x[2]))
        maxpool.supports_masking = True
        question_pool = maxpool(question_cnn)
        answer_pool = maxpool(answer_cnn)

        # get similarity similarity score
        similarity = self.get_cosine_similarity()
        merged_model = merge([question_pool, answer_pool],
                             mode=similarity,
                             output_shape=lambda _: (None, 1))
        lstm_convolution_model = Model(inputs=[question, answer],
                                       outputs=merged_model,
                                       name='lstm_convolution_model')
        good_similarity = lstm_convolution_model([question, answer_good])
        bad_similarity = lstm_convolution_model([question, answer_bad])

        # compute the loss
        loss = merge([good_similarity, bad_similarity],
                     mode=lambda x: K.relu(margin - x[0] + x[1]),
                     output_shape=lambda x: x[0])

        # return the training and prediction model
        prediction_model = Model(inputs=[question, answer_good],
                                 outputs=good_similarity,
                                 name='prediction_model')
        prediction_model.compile(loss=lambda y_true, y_pred: y_pred,
                                 optimizer="rmsprop")
        training_model = Model(inputs=[question, answer_good, answer_bad],
                               outputs=loss,
                               name='training_model')
        training_model.compile(loss=lambda y_true, y_pred: y_pred,
                               optimizer="rmsprop")

        return training_model, prediction_model