예제 #1
0
def HAN( embed_mat, MAX_LEN, MAX_SENTS, num_cls, gru_sz1 = 100, gru_sz2 = 100):
    embedding_layer = Embedding(embed_mat.shape[0] ,
                                embed_mat.shape[1],
                                weights=[embed_mat],
                                input_length=MAX_LEN,
                                mask_zero=True,
                                trainable=True)

    sentence_input = Input(shape=(MAX_LEN,), dtype='int32')
    embedded_sequences = embedding_layer( sentence_input )#sentence_input)
    l_lstm = Bidirectional(LSTM(gru_sz1, return_sequences=True))(embedded_sequences)
    l_att = AttLayer( name='att_1')(l_lstm)
    sentEncoder = Model(sentence_input, l_att)
    review_input = Input(shape=(MAX_SENTS,MAX_LEN), dtype='int32')
    review_encoder = TimeDistributed(sentEncoder)(review_input)
    review_encoder = Masking(mask_value=0.)(review_encoder )
    l_lstm_sent = Bidirectional(LSTM(gru_sz2, return_sequences=True))(review_encoder)
    l_att_sent = AttLayer(name='att_2')(l_lstm_sent)
    preds = Dense(num_cls, activation='softmax',name='twt_softmax')(l_att_sent)
    model = Model(review_input, preds)
    opt = Adagrad(lr=0.1)#, clipvalue=5.0
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['acc'])
    return model
예제 #2
0
 def __init__(self,
              C=4,
              V=40000,
              MAX_SENT=20,
              MAX_LEN=100,
              name='hanmodel.h5'):
     self.name = name
     input = Input(shape=(MAX_LEN, ), dtype='int32', name='input')
     #RNN支持mask
     x = Embedding(V, 32, mask_zero=True)(input)
     h = Bidirectional(GRU(64, return_sequences=True))(x)
     z = AttLayer()(h)
     sent_model = Model(input, z)
     sent_input = Input(shape=(MAX_SENT, MAX_LEN),
                        dtype='int32',
                        name='sent_input')
     h = TimeDistributed(sent_model)(sent_input)
     h = Masking()(h)
     h = Bidirectional(GRU(64, return_sequences=True))(h)
     z = AttLayer()(h)
     z = Dense(128, activation='relu')(z)
     # z = BatchNormalization()(z)
     z = Dense(C, activation='softmax')(z)
     model = Model(sent_input, z)
     model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
     self.model = model
예제 #3
0
    def _init_layers(self):
        # Sentence-level model
        self.sent_input = Input(shape=(None,))
        self.sent_embedding = Embedding(input_dim=self.vocab_size + 1, output_dim=self.embedding_dim)
        self.sent_recurrent_cells = [Bidirectional(self.recurrent_cell(units=self.hidden_dim, return_sequences=True)) if self.bidirectional
                                else self.recurrent_cell(units=self.hidden_dim, return_sequences=True) for _ in range(self.hidden_layers)]
        self.sent_att_layer = AttLayer(attention_dim=self.hidden_dim)

        # Document-level model
        self.doc_input = Input(shape=(None, None))
        self.doc_recurrent_cells = [Bidirectional(self.recurrent_cell(units=self.top_hidden_dim, return_sequences=True)) if self.bidirectional
                                else self.recurrent_cell(units=self.top_hidden_dim, return_sequences=True) for _ in range(self.hidden_layers)]
        self.doc_att_layer = AttLayer(attention_dim=self.top_hidden_dim)
        self.out_layer = Dense(units=1, activation='sigmoid')
예제 #4
0
    def feed_forward(self, x, train_top):
        #6层网络
        xs_till_now = []
        xs_till_now.append(x)
        filter_size = 128
        x = Dense(filter_size)(x)
        x1 = Activation('relu')(x)
        x1 = Conv1D(filter_size, 3, padding='same', trainable=train_top)(x1)
        xs_till_now.append(x1)
        x = Concatenate()(xs_till_now)
        # x2 = Conv1D(filter_size, 3, padding='same', trainable=train_top)(x1)
        # x = Add()([xmap, x2])
        # x = MaxPool1D(pool_size=3, strides=2)(x)

        for _ in range(5):
            x1 = Activation('relu')(x)
            x1 = Conv1D(filter_size, 3, padding='same',
                        trainable=train_top)(x1)
            xs_till_now.append(x1)
            x = Concatenate()(xs_till_now)
            # x2 = Conv1D(filter_size, 3, padding='same',trainable=train_top)(x1)
            # x = Add()([x, x2])
            # x = MaxPool1D(pool_size=3, strides=2)(x)
        hs = []
        for xi in xs_till_now:
            hs.append(AttLayer()(xi))
        x = GlobalMaxPool1D()(x)
        hs.append(x)
        return Concatenate()(hs)
예제 #5
0
 def feed_forward(self, x, train_top):
     h = Bidirectional(GRU(128, return_sequences=True, trainable=train_top),
                       trainable=train_top)(x)
     # h2 = Bidirectional(GRU(64, return_sequences=True, trainable=train_top), trainable=train_top)(h1)
     # h = Add()([h1, h2])
     z = AttLayer()(h)
     return z
예제 #6
0
    def build_ABLSTM(self, paramsObj, weight=[]):

        model = Sequential()

        # Embeddings
        if len(weight) == 0 or paramsObj.use_word_embedding == False:
            model.add(
                Embedding(config.MAX_NUM_WORDS,
                          config.EMBEDDING_DIM,
                          input_length=config.MAX_SEQ_LENGTH))
        else:
            model.add(
                Embedding(config.MAX_NUM_WORDS,
                          config.EMBEDDING_DIM,
                          weights=[weight],
                          input_length=config.MAX_SEQ_LENGTH,
                          trainable=paramsObj.train_embedding))

        model.add(
            Bidirectional(
                GRU(128,
                    dropout=0.2,
                    recurrent_dropout=0.1,
                    return_sequences=True)))
        # TODO: add time steps again

        model.add(AttLayer())

        model.add(Dense(config.ClassNum, activation='softmax'))
        model.compile(loss='binary_crossentropy',
                      optimizer='rmsprop',
                      metrics=['accuracy'])

        return model
예제 #7
0
    def build_HAN1(self, paramsObj, weight=[]):

        # Embeddings
        if len(weight) == 0 or paramsObj.use_word_embedding == False:
            # NOT use word embedding
            embedding_layer = Embedding(config.MAX_NUM_WORDS,
                                        config.EMBEDDING_DIM,
                                        input_length=config.MAX_SEQ_LENGTH)
        else:
            # use word embedding
            embedding_layer = Embedding(config.MAX_NUM_WORDS,
                                        config.EMBEDDING_DIM,
                                        input_length=config.MAX_SEQ_LENGTH,
                                        weights=[weight],
                                        trainable=paramsObj.train_embedding)

        # Create the sentModel
        sentence_input = Input(
            shape=(config.MAX_SEQ_LENGTH,
                   ),  # no need to specify the last dimension, why
            dtype='int32',
            name='sentence_input')
        embedding_sequences = embedding_layer(sentence_input)
        l_lstm = Bidirectional(GRU(100,
                                   return_sequences=True))(embedding_sequences)
        l_dense = TimeDistributed(Dense(200))(l_lstm)
        l_att = AttLayer()(l_dense)
        sentEncoder = Model(sentence_input, l_att)

        # dialogModel
        dialog_input = Input(shape=(config.MAX_SENTS, config.MAX_SEQ_LENGTH),
                             dtype='int32')
        dialog_encoder = TimeDistributed(sentEncoder)(dialog_input)
        l_lstm_sent = Bidirectional(GRU(100,
                                        return_sequences=True))(dialog_encoder)
        l_dense_sent = TimeDistributed(Dense(200))(l_lstm_sent)
        l_att_sent = AttLayer()(l_dense_sent)

        # output layer
        preds = Dense(2, activation='softmax')(l_att_sent)
        model = Model(dialog_input, preds)

        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['acc'])

        return model
    def build_functional_model(self):
        word_input = Input(shape=(None, ), name='word_input')
        decoder_input = Input(shape=(None, ), name='decoder_input')
        conversation_input = Input(shape=(None, None),
                                   name='conversation_input')

        # Word-level Encoder
        embed_layer = Embedding(input_dim=self.vocab_size,
                                output_dim=self.embedding_dim,
                                mask_zero=True,
                                name='embedding')
        word_encoder_layers = [Bidirectional(self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=False)) if self.encoder_type == 'bidi' \
                        else self.rec_cell(units=self.encoder_dim, return_sequences=True, return_state=False) for _ in range(self.num_encoder_layers)]
        word_att_layer = AttLayer(attention_dim=self.encoder_dim)
        # Utterance-level Encoder
        utt_encoder_layer = self.rec_cell(units=self.encoder_dim,
                                          return_sequences=True,
                                          return_state=True,
                                          name='utterance_rnn')

        # Build word-level encoder
        word_embedded = embed_layer(word_input)
        word_embedded = Dropout(0.2)(word_embedded)
        for l_ix, l in enumerate(word_encoder_layers):
            if l_ix == 0:
                h_out = l(word_embedded)
            else:
                h_out = l(h_out)
        h_att_word = word_att_layer(h_out)

        word_encoder = Model(inputs=word_input, output=h_att_word)

        # Build context-level encoder
        context_encoder = TimeDistributed(word_encoder)(conversation_input)
        context_h_out, state_h, state_c = utt_encoder_layer(context_encoder)

        # Decoder
        decoder_embed = embed_layer(decoder_input)
        decoder_embed = Dropout(0.2)(decoder_embed)
        decoder = self.rec_cell(units=self.decoder_dim, return_sequences=True)
        decoder_output = decoder(decoder_embed)
        decoder_combined_context = Lambda(
            self._dot_attention_block)([context_h_out, decoder_output])
        logits = Dense(units=self.vocab_size,
                       activation='linear',
                       name='logits')(decoder_combined_context)

        self.model = Model(inputs=[conversation_input, decoder_input],
                           outputs=logits)
        self.model.compile(optimizer=self.optimizer, loss=self.sparse_loss)
        self.model.summary()
    def __init__(self,
                 C=4,
                 V=40000,
                 MAX_LEN=600,
                 embed_matrix=None,
                 name='sscharmodel.h5',
                 PE=False,
                 train_embed=False):
        self.MAX_LEN = MAX_LEN
        self.PE = PE
        self.name = name
        input = Input(shape=(MAX_LEN, ), dtype='int32')

        #CNN不支持mask,即 mask_zero=True
        if embed_matrix is None:
            x = Embedding(V, 32)(input)
        else:
            embed1 = Embedding(embed_matrix.shape[0],
                               embed_matrix.shape[1],
                               weights=[embed_matrix],
                               trainable=train_embed)
            x = embed1(input)
        if self.PE:
            e_input = Input(shape=(MAX_LEN, ), dtype='int32', name='PE_in')
            ex = Embedding(self.MAX_LEN, 32, name='PE')(e_input)
            x = Concatenate()([x, ex])
        kss = [2, 3, 4, 5]
        hs = []
        for ks in kss:
            h = Conv1D(128, ks, activation='relu', padding='same')(x)
            # h = GlobalMaxPool1D()(h)
            h1 = GlobalMaxPool1D()(h)
            h2 = GlobalAveragePooling1D()(h)
            h3 = AttLayer()(h)
            h = Concatenate()([h1, h2, h3])
            hs.append(h)
        hs = Concatenate()(hs)
        # hs = BatchNormalization()(hs)
        z = Dense(128, activation='relu')(hs)
        # z = BatchNormalization()(z)
        z = Dense(C, activation='softmax')(z)
        if self.PE:
            model = Model([input, e_input], z)
        else:
            model = Model(input, z)
        opt = Adagrad(lr=0.005)
        model.compile(opt, 'categorical_crossentropy', metrics=['acc'])
        self.model = model
예제 #10
0
def RNNAtt(embed_mat, MAX_LEN, num_cls, rnn_sz=100):
    embed = Embedding(embed_mat.shape[0],
                      embed_mat.shape[1],
                      weights=[embed_mat],
                      input_length=MAX_LEN,
                      trainable=False)
    sequence_input = Input(shape=(MAX_LEN, ), dtype='int32')
    embedded_sequences = embed(sequence_input)
    l_lstm = Bidirectional(LSTM(rnn_sz,
                                return_sequences=True))(embedded_sequences)
    z = AttLayer()(l_lstm)
    preds = Dense(num_cls, activation='softmax')(z)
    model = Model(sequence_input, preds)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adagrad',
                  metrics=['acc'])
    return model
예제 #11
0
    def build_model(self):
        self._init_layers()
        in_layer = Input(shape=(None,))
        embedded = self.embedding_layer(in_layer)
        if self.embedding_dropout_rate > 0.:
            embedded = Dropout(rate=self.embedding_dropout_rate)(embedded)
        for layer_idx, layer in enumerate(self.recurrent_cells):
            if layer_idx == 0:
                h_out = layer(embedded)
            else:
                h_out = layer(h_out)
        
        # x, attn = self.attention_layer(h_out)
        # x = Dropout(rate=0.5)(x)
        # x = Lambda(lambda x: K.max(x, axis=1))(h_out)
        x = AttLayer(attention_dim=self.hidden_dim)(h_out)

        y_out = self.out_layer(x)
        model = Model(inputs=in_layer, outputs=y_out)
        model.summary()
        self.model = model
예제 #12
0
 def feed_input(input_x, sub_name):
     x1 = Embedding(input_dim=num_word,
                    output_dim=embed_dim,
                    name=sub_name + 'embed_s',
                    weights=[embed_mat],
                    trainable=False)(input_x)
     x2 = Embedding(input_dim=num_word,
                    output_dim=embed_dim,
                    name=sub_name + 'embed_d',
                    weights=[embed_mat],
                    trainable=True)(input_x)
     x = Concatenate()([x1, x2])
     # CNN model
     kls = [2, 3, 4, 5]
     hs = []
     for kl in kls:
         h = Conv1D(conv_dim, kl, activation='relu')(x)
         # h = GlobalMaxPool1D()(h)
         h = AttLayer()(h)
         hs.append(h)
     h2 = Concatenate()(hs)
     h2 = BatchNormalization()(h2)
     return h2
예제 #13
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 hidden_size,
                 l2_reg_lambda=0.0):
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        with tf.variable_scope('discriminator'):
            # Embedding layer
            with tf.device('/cpu:0'), tf.name_scope("embedding"):
                input = Input(tensor=self.input_x)
                embedding_layer = Embedding(vocab_size,
                                            embedding_size,
                                            input_length=sequence_length,
                                            mask_zero=True,
                                            trainable=True)
                lstm = Bidirectional(GRU(hidden_size, return_sequences=True))
                att = AttLayer(bias=True, name='att_1')
                encoded = embedding_layer(input)
                encoded = lstm(encoded)
                self.encoded_f = att(encoded)
            # Final (unnormalized) scores and predictions
            with tf.name_scope("output"):
                W = tf.Variable(tf.truncated_normal(
                    [hidden_size * 2, num_classes], stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_classes]),
                                name="b")
                l2_loss += tf.nn.l2_loss(W)
                l2_loss += tf.nn.l2_loss(b)
                self.scores = tf.nn.xw_plus_b(self.encoded_f,
                                              W,
                                              b,
                                              name="scores")
                self.ypred_for_auc = tf.nn.softmax(self.scores)
                self.predictions = tf.argmax(self.scores,
                                             1,
                                             name="predictions")

            # CalculateMean cross-entropy loss
            with tf.name_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.scores, labels=self.input_y)
                self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        self.params = [
            param for param in tf.trainable_variables()
            if 'discriminator' in param.name
        ]
        d_optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = d_optimizer.compute_gradients(self.loss,
                                                       self.params,
                                                       aggregation_method=2)
        self.train_op = d_optimizer.apply_gradients(grads_and_vars)
예제 #14
0
 def _init_layers(self):
     self.embedding_layer = Embedding(input_dim=self.vocab_size + 1, output_dim=self.embedding_dim, mask_zero=False)
     self.recurrent_cells = [Bidirectional(self.recurrent_cell(units=self.hidden_dim, return_sequences=True)) if self.bidirectional
                             else self.recurrent_cell(units=self.hidden_dim, return_sequences=True) for _ in range(self.hidden_layers)]
     self.attention_layer = AttLayer(attention_dim=self.hidden_dim)
     self.out_layer = Dense(units=1, activation='sigmoid')
print('embedded_sequences SHAPE')
print(embedded_sequences.get_shape())

# Bidirectional GRU
l_gru = MultiplicativeLSTM(gru_output_size, return_sequences=True)(embedded_sequences)
l_dense = TimeDistributed(Dense(units=gru_output_size))(l_gru) 

print('l_gru SHAPE')
print(l_gru.get_shape())
print('l_dense SHAPE')
print(l_dense.get_shape())


# Word-Level Attention Layer
l_att = AttLayer()(l_dense)

print('l_att SHAPE')
print(l_att.get_shape())

sentEncoder = Model(sentence_input, l_att)
sentEncoder.compile(
    optimizer=Adam(0.0001),
    loss='mse',
    metrics={},
)

review_encoder = TimeDistributed(sentEncoder)(review_input)

#sentEncoder.summary()
print('l_att SHAPE')
예제 #16
0
conv2 = Conv1D(embedding_dims, kernel_size=4, activation='tanh')(reshape)
conv2 = MaxPool1D(int((maxsents*maxlen)/5))(conv2)
conv2 = Flatten()(conv2)

conv3 = Conv1D(embedding_dims, kernel_size=8, activation='tanh')(reshape)
conv3 = MaxPool1D(int((maxsents*maxlen)/5))(conv3)
conv3 = Flatten()(conv3)

concatenated_tensor = keras.layers.Concatenate(axis=1)([fasttext , conv1 , conv2 , conv3])
fasttext = Dense(units=embedding_dims, activation='tanh')(concatenated_tensor)

# Bidirectional GRU
l_gru_sent = TimeDistributed(Bidirectional(GRU(gru_output_size, return_sequences=True)))(review_embedded)
l_gru_sent = keras.layers.Concatenate()( [ l_gru_sent , Reshape((maxsents,maxlen,gru_output_size))( keras.layers.RepeatVector(maxsents*maxlen)(fasttext) ) ] )
l_dense_sent = TimeDistributed(TimeDistributed(Dense(units=gru_output_size)))(l_gru_sent)
l_att_sent = TimeDistributed(AttLayer())(l_dense_sent)

# Bidirectional GRU
l_gru_review = Bidirectional(GRU(gru_output_size, return_sequences=True))(l_att_sent)
l_gru_review = keras.layers.Concatenate()( [ l_gru_review , keras.layers.RepeatVector(maxsents)(fasttext) ] )
l_dense_review = TimeDistributed(Dense(units=gru_output_size))(l_gru_review)
postp = AttLayer()(l_dense_review)

# Memory Mechanism
aux_mem = Dense(units=(final_output), activation='tanh', weights=(init_m_aux.transpose(),np.zeros(gru_output_size+embedding_dims)), name='memory')(aux_input)
postp_aux = keras.layers.Concatenate( axis = 1 )( [ postp , fasttext , aux_mem , age_input , dep_input] )
postp = Dropout(0.05)(postp_aux)
postp = Dense(units=(final_output))(postp)

# Softmax/Sigmoid Output Layer
preds = Dense(units=y_train.shape[1], activation='softmax', weights=[init_m_full, bias_full], name='main')(postp)
예제 #17
0
    def build_ABCNN(self, paramsObj, weight=[]):

        # Embeddings
        if len(weight) == 0 or paramsObj.use_word_embedding == False:
            # NOT use word embedding
            embedding_layer = Embedding(config.MAX_NUM_WORDS,
                                        config.EMBEDDING_DIM,
                                        input_length=config.MAX_SEQ_LENGTH)
        else:
            # use word embedding
            embedding_layer = Embedding(config.MAX_NUM_WORDS,
                                        config.EMBEDDING_DIM,
                                        input_length=config.MAX_SEQ_LENGTH,
                                        weights=[weight],
                                        trainable=paramsObj.train_embedding)

        # Create Model
        main_input = Input(shape=(config.MAX_SEQ_LENGTH, ),
                           dtype='int32',
                           name='main_input')
        embedding_sequences = embedding_layer(main_input)

        # params
        inner = 'outer'
        type = 'atten'

        if (inner == 'inner'):
            padding = 'valid'
        else:
            padding = 'same'

        conv_att_features = []
        # i did not use the pool_size and num_filter here
        nb_filter = 10
        for filter_length, pool_size, num_filter in zip(
                paramsObj.filter_size, paramsObj.pool_size,
                paramsObj.num_filter):
            convolution_layer = Conv1D(filters=nb_filter,
                                       kernel_size=filter_length,
                                       padding=padding,
                                       activation='relu',
                                       name='convLayer' + str(filter_length))
            conv_out = convolution_layer(embedding_sequences)

            ###attenton#########
            if (type == 'atten' and inner == 'inner'):
                att_inpt = TimeDistributed(Dense(nb_filter))(conv_out)
                att_out = AttLayer(name='AttLayer' +
                                   str(filter_length))(att_inpt)
                conv_att_features.append(att_out)
            elif (type == 'max'):
                out = MaxPooling1D(name='maxPooling' + str(filter_length),
                                   pool_size=(config.MAX_SEQ_LENGTH -
                                              filter_length + 1))(conv_out)
                conv_att_features.append(out)
            else:
                conv_att_features.append(conv_out)

        if (len(paramsObj.filter_size) > 1):
            X = concatenate(conv_att_features, axis=1)
        else:
            X = conv_att_features[0]

        if (type == 'max'):
            X = Flatten()(X)
        if (inner == 'outer'):
            X = TimeDistributed(Dense(len(paramsObj.filter_size) * nb_filter),
                                name='DenseTimeDistributed')(X)
            X = AttLayer(name='AttLayer')(X)

        X = Dropout(0.9)(X)

        # x = Dense(output_dim=hidden_dims, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01))(attention_features)
        hidden_dims = 100
        x = Dense(units=hidden_dims, activation='relu')(X)

        # dense hidden layer
        predictions = Dense(config.ClassNum, activation='softmax')(x)

        # build the model
        model = Model(main_input, predictions)
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        return model
    def build_model(self):
        context_input = Input(shape=(None, ), name='context_input')
        current_input = Input(shape=(None, ), name='current_input')
        response_input = Input(shape=(None, ), name='response_input')
        decoder_target = tf.placeholder(shape=[None, None], dtype='int32')

        embed_layer = Embedding(input_dim=self.vocab_size,
                                output_dim=self.embedding_dim,
                                mask_zero=True,
                                name='embedding')
        context_embed = embed_layer(context_input)
        current_embed = embed_layer(current_input)
        context_embed = Dropout(0.2)(context_embed)
        current_embed = Dropout(0.2)(current_embed)

        # ENCODER
        context_bidi_encoder1 = self.rec_cell(units=self.encoder_dim,
                                              return_sequences=True,
                                              name='context_encoder1')
        context_bidi_encoder2 = self.rec_cell(units=self.encoder_dim,
                                              return_sequences=True,
                                              return_state=False,
                                              name='context_encoder2')

        current_bidi_encoder1 = self.rec_cell(units=self.encoder_dim,
                                              return_sequences=True,
                                              return_state=False,
                                              name='current_encoder1')
        current_bidi_encoder2 = self.rec_cell(units=self.encoder_dim,
                                              return_sequences=True,
                                              return_state=True,
                                              name='current_encoder2')

        # Encode context
        context_encoded = context_bidi_encoder1(context_embed)
        context_encoded = context_bidi_encoder2(context_encoded)
        context_att_h = AttLayer(attention_dim=200)(context_encoded)
        context_att_h = Lambda(lambda x: K.expand_dims(x, 1))(context_att_h)

        # Encode current utterance to respond to
        current_encoded = current_bidi_encoder1(current_embed)
        current_encoded, state_h, state_c = current_bidi_encoder2(
            current_encoded)
        # current_encoded, fwd_h, fwd_c, bwd_h, bwd_c = current_bidi_encoder1(current_embed)
        # state_h = Concatenate()([fwd_h, bwd_h])
        # state_c = Concatenate()([fwd_c, bwd_c])
        current_att_h = AttLayer(attention_dim=200)(current_encoded)
        current_att_h = Lambda(lambda x: K.expand_dims(x, 1))(current_att_h)

        encoded_concat = Concatenate(axis=1, name='context_current_concat')(
            [context_att_h, current_att_h])
        encoder_output = self.rec_cell(
            units=self.encoder_dim,
            return_sequences=True,
            return_state=False,
            go_backwards=True,
            name='top_level_encoder')(encoded_concat)

        # DECODER
        rnn_decoder = self.rec_cell(units=self.decoder_dim,
                                    return_sequences=True,
                                    name='decoder1')

        decoder_embed = embed_layer(response_input)
        decoder_embed = Dropout(0.2)(decoder_embed)
        decoder_output = rnn_decoder(decoder_embed,
                                     initial_state=[state_h, state_c])

        # Attention
        attention = Dot(axes=[2, 2], name='decoder_encoder_dot')(
            [decoder_output, encoder_output])
        attention = Activation('softmax', name='attention_probs')(attention)
        context = Dot(axes=[2, 1],
                      name='att_encoder_context')([attention, encoder_output])
        decoder_combined_context = Concatenate(name='decoder_context_concat')(
            [context, decoder_output])

        logits_out = Dense(units=self.vocab_size,
                           activation='linear',
                           name='logits')(decoder_combined_context)

        self.model = Model(
            inputs=[context_input, current_input, response_input],
            outputs=logits_out)
        self.model.compile(loss=self.sparse_loss,
                           optimizer=self.optimizer,
                           target_tensors=[decoder_target])
        self.model.summary()
# Embedding Layer
embedding_layer = Embedding(max_features, embedding_dims, 
                            input_length=maxlen)

# WORD-LEVEL
sentence_input = Input(shape=(maxlen,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)


# Bidirectional GRU
l_gru = Bidirectional(GRU(gru_output_size, return_sequences=True))(embedded_sequences)
l_dense = TimeDistributed(Dense(units=gru_output_size))(l_gru) 

# Word-Level Attention Layer
l_att = AttLayer()(l_dense)

sentEncoder = Model(sentence_input, l_att)
sentEncoder.compile(
    optimizer=Adam(0.0001),
    loss='mse',
    metrics={},
)

review_encoder = TimeDistributed(sentEncoder)(review_input)


# SENTENCE_LEVEL
# Bidirectional GRU
l_gru_sent = Bidirectional(GRU(gru_output_size, return_sequences=True))(review_encoder)#(reshaped)
l_dense_sent = TimeDistributed(Dense(units=gru_output_size))(l_gru_sent)
예제 #20
0
    def build_hang(self, paramsObj, weight=[]):

        # Embeddings
        if len(weight) == 0 or paramsObj.use_word_embedding == False:
            # NOT use word embedding
            embedding_layer = Embedding(config.MAX_NUM_WORDS,
                                        config.EMBEDDING_DIM,
                                        input_length=config.MAX_SEQ_LENGTH)
        else:
            # use word embedding
            embedding_layer = Embedding(config.MAX_NUM_WORDS,
                                        config.EMBEDDING_DIM,
                                        input_length=config.MAX_SEQ_LENGTH,
                                        weights=[weight],
                                        trainable=paramsObj.train_embedding)

        # Create Model
        main_input = Input(
            shape=(config.MAX_SEQ_LENGTH,
                   ),  # no need to specify the last dimension, why
            dtype='int32',
            name='main_input')
        embedding_sequences = embedding_layer(main_input)
        embedding_sequences = Dropout(
            paramsObj.dropout_rate)(embedding_sequences)

        conv_feature_list = []
        for filter_size, pool_size, num_filter in zip(paramsObj.filter_size,
                                                      paramsObj.pool_size,
                                                      paramsObj.num_filter):
            conv_layer = Conv1D(filters=num_filter,
                                kernel_size=filter_size,
                                strides=1,
                                padding='same',
                                activation='relu')(embedding_sequences)
            pool_layer = MaxPooling1D(pool_size=pool_size)(conv_layer)
            conv_feature_list.append(pool_layer)
        if (len(conv_feature_list) == 1):
            out = conv_feature_list[0]
        else:
            out = concatenate(conv_feature_list, axis=1)
        # network = Model(inputs=cnn_inp, outputs=out)

        X = TimeDistributed(Dense(
            len(paramsObj.filter_size) * paramsObj.pool_size[0]),
                            name='DenseTimeDistributed')(out)
        X = AttLayer(name='AttLayer')(X)

        # add dense layer to complete the model
        X = Dropout(paramsObj.dropout_rate)(X)
        X = Dense(paramsObj.dense_layer_size,
                  kernel_initializer='uniform',
                  activation='relu')(X)

        # output layer
        predictions = Dense(config.ClassNum, activation='softmax')(X)
        model = Model(main_input, predictions)

        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        return model
예제 #21
0
        return h2

    h2 = feed_input(input_x, 'term')
    h2c = feed_input(input_xc, 'char')
    h2 = Concatenate()([h2, h2c])


# 训练集合大小为315364,大约1000*256
embed = Embedding(input_dim=num_word,
                  output_dim=embed_dim,
                  name='embed_s',
                  weights=[embed_mat],
                  trainable=True)
x = embed(input_x)
h2_term = LSTM(rnn_unit_1, return_sequences=True)(x)
h2_term = AttLayer()(h2_term)
x_c = embed(input_xc)
h2_c = LSTM(rnn_unit_1, return_sequences=True)(x_c)
h2_c = AttLayer()(h2_c)
h2 = Concatenate()([h2_term, h2_c])

pred = Dense(class_num, activation='softmax')(h2)
k_model = keras.Model([input_x, input_xc], pred)
opt = keras.optimizers.Adam(0.001)
k_model.compile(opt, 'categorical_crossentropy', [
    'acc',
])

earlystop = EarlyStopping(min_delta=0.01, patience=1)
save_best = ModelCheckpoint(os.path.join(MODEL_PATH, "model.h5"),
                            save_best_only=True)