def rnn(embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True))

    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)
    q1_encoded = Dropout(0.2)(q1_encoded)
    q2_encoded = Dropout(0.2)(q2_encoded)
    # 双向
    #     q1_encoded = encode2(q1_encoded)
    #     q2_encoded = encode2(q2_encoded)
    # resnet
    rnn_layer2_input1 = concatenate([q1_embed, q1_encoded])
    rnn_layer2_input2 = concatenate([q2_embed, q2_encoded])
    q1_encoded2 = encode2(rnn_layer2_input1)
    q2_encoded2 = encode2(rnn_layer2_input2)

    # add res shortcut
    res_block1 = add([q1_encoded, q1_encoded2])
    res_block2 = add([q2_encoded, q2_encoded2])
    rnn_layer3_input1 = concatenate([q1_embed, res_block1])
    rnn_layer3_input2 = concatenate([q2_embed, res_block2])
    #     rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2])
    #     rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2])
    q1_encoded3 = encode3(rnn_layer3_input1)
    q2_encoded3 = encode3(rnn_layer3_input2)
    #     merged1 = GlobalMaxPool1D()(q1_encoded3)
    #     merged2 = GlobalMaxPool1D()(q2_encoded3)
    #     q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1)
    #     q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1)

    #     merged1 = concatenate([q1_encoded2, q1_embed], axis=-1)
    #     merged2 = concatenate([q2_encoded2, q2_embed], axis=-1)
    #     # TODO add attention rep , maxpooling rep
    q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3])
    q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3])
    merged1 = GlobalMaxPool1D()(q1_encoded3)
    merged2 = GlobalMaxPool1D()(q2_encoded3)
    # avg1 = GlobalAvgPool1D()(q1_encoded3)
    # avg2 = GlobalAvgPool1D()(q2_encoded3)
    # merged1 = concatenate([max1,avg1])
    # merged2 = concatenate([max2,avg2])
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2])
    mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2])
    #     jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)-
    #                                               K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2])
    #     merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep])
    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)

    merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense])
    # Classifier
    dense = Dropout(config['dense_dropout'])(merged)
    dense = BatchNormalization()(dense)
    dense = Dense(config['dense_dim'], activation='relu')(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = BatchNormalization()(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2, feature_input], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
# if printed out, it would look like the following
# Tensor("input_1:0", shape=(?, 200), dtype=float32)

embed_size = 128
x = Embedding(max_features, embed_size)(inp)
# this will have a shape of (None, 200, 128)

x = LSTM(60, return_sequences=True, name='lstm_layer')(x)
# this will have a shape of (None, 200, 60)
# first number in (None, 200, 60) is the batch size
# followed by the time step and the output size
# according to https://www.kaggle.com/sbongo/for-beginners-tackling-toxic-using-keras
# this is the unrolled version of the LSTM because there are 60 hidden layers
# I suppose the rolled version would just have a 1 instead of 60

x = GlobalMaxPool1D()(x)

x = Dropout(0.1)(x)

x = Dense(50, activation="relu")(x)

x = Dropout(0.1)(x)

x = Dense(6, activation="sigmoid")(x)

model = Model(inputs=inp, outputs=x)

model.compile(loss="binary_crossentropy",
              optimizer='adam',
              metrics=['accuracy'])
                            trainable=False)

# 2) MODEL LAYERS............................................
# Creating a 1-D ConvNet with Global Max POoling
# Since the input is size N X T ,,,so we pass T
# which is MAX_SEQUENCE_LENGTH
input_ = Input(shape=(MAX_SEQUENCE_LENGTH, ))
layer = embedding_layer(input_)
layer = Conv1D(128, 3, activation='relu')(layer)
layer = Dropout(0.3)(layer)
layer = MaxPool1D(3)(layer)
layer = Conv1D(128, 3, activation='relu')(layer)
layer = Dropout(0.4)(layer)
layer = MaxPool1D(3)(layer)
layer = Conv1D(128, 3, activation='relu')(layer)
layer = GlobalMaxPool1D()(layer)
layer = Dense(128, activation='relu')(layer)
output = Dense(len(possible_labels), activation='sigmoid')(layer)

# ----------------- MODEL COMPILE -------------------------..
model = Model(input_, output)
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# ----------------- MODEL TRAINING --------------------------
train = model.fit(data,
                  targets,
                  batch_size=BATCH_SIZE,
                  epochs=EPOCH,
                  validation_split=VALIDATION_SPLIT)
Esempio n. 4
0
    def create_model(self):
        if self.config.model_type == 'LSTM':
            

            # model = Sequential()
            # model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1]))
            # model.add(LSTM(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout))
            # model.add(Dropout(self.config.dropout))
            # model.add(LSTM(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout ))
            # model.add(Dropout(self.config.dropout))
            # model.add(LSTM(self.config.dim , recurrent_dropout=self.config.dropout))
            # model.add(Dense(self.config.dim,activation='relu'))
            # model.add(Dense(3,activation='softmax'))

            # model = Sequential()
            # model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1]))
            # model.add(LSTM(self.config.dim, dropout=self.config.dropout , recurrent_dropout=self.config.dropout))
            # model.add(Dropout(self.config.dropout))
            # model.add(Dense(3,activation='softmax'))

            inp = Input(shape=(self.config.max_len,))
            x = Embedding(self.config.max_words, self.config.max_len ,input_length = self.X.shape[1])(inp)
            x = Bidirectional(LSTM(self.config.dim, return_sequences=True, dropout=self.config.dropout, recurrent_dropout=self.config.dropout , kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01)))(x)
            x = GlobalMaxPool1D()(x)
            x = Dense(self.config.dim, activation="sigmoid")(x)
            x = Dropout(self.config.dropout)(x)
            x = Dense(3, activation='softmax')(x)
            model = Model(inputs=inp, outputs=x)


            self.model = model

        if self.config.model_type == 'GRU':

            model = Sequential()
            model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1]))
            model.add(GRU(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout))
            model.add(Dropout(self.config.dropout))
            model.add(GRU(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout ))
            model.add(Dropout(self.config.dropout))
            model.add(GRU(self.config.dim , recurrent_dropout=self.config.dropout))
            model.add(Dense(self.config.dim,activation='relu'))
            model.add(Dense(3,activation='softmax'))

            self.model = model

        if self.config.model_type == 'MLP':
            model = Sequential()
            model.add(Dense(len(self.word_dict), input_shape=(self.X.shape[1],) , activation="relu"))
            model.add(Dropout(self.config.dropout))
            model.add(Dense(self.config.dim,activation='relu'))
            model.add(Dropout(self.config.dropout))
            model.add(Dense(self.config.dim,activation="relu"))
            model.add(Dropout(self.config.dropout))
            model.add(Dense(self.config.dim , activation="relu"))
            model.add(Dense(3,activation='softmax'))

            self.model = model

        if self.config.debug:
            print(self.model.summary())
max_features, max_len = 2000, 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

model = Sequential()

model.add(
    Embedding(max_features, 128, input_length=max_len, name='embedding_layer'))
model.add(Conv1D(32, 7, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Conv1D(32, 7, activation='relu'))
model.add(GlobalMaxPool1D())

model.add(Dense(1))

model.summary()

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(x_train,
          y_train,
          epochs=20,
          batch_size=128,
          validation_split=0.2,
          callbacks=[
Esempio n. 6
0
model = Sequential()
model.add(
    Embedding(max_features,
              embed_size,
              weights=[embedding_matrix],
              trainable=True,
              name='Word-Embedding-Layer'))
model.add(Dropout(0.4, name='Dropout-Regularization-1'))  # Best = 0.3
model.add(
    Bidirectional(LSTM(12,
                       return_sequences=True,
                       dropout=0.35,
                       recurrent_dropout=0.35,
                       kernel_initializer=glorot_normal(seed=None)),
                  name='BDLSTM'))  #Best = 300,0.25,0.25
model.add(GlobalMaxPool1D(name='Global-Max-Pool-1d'))
model.add(
    Dense(y_binary.shape[1], activation="softmax", name='FC-Output-Layer'))
model.compile(loss='categorical_crossentropy',
              optimizer='nadam',
              metrics=['mse', 'acc'])
history = model.fit(xtrain,
                    ytrain,
                    validation_split=0.2,
                    validation_data=(xtest, ytest),
                    batch_size=5000,
                    epochs=1,
                    callbacks=[early_stop],
                    verbose=1)
#history = model.fit(X_train, Y_train, epochs=42, batch_size=50, verbose=1)
print(history.history.keys())
Esempio n. 7
0
from tqdm.notebook import tqdm
from tensorflow import keras
import pandas as pd
import numpy as np
import tensorflow as tf
import os
import re
#%%
train_data=pd.read_csv("c:/temp/train_data.csv",index_col=('Unnamed: 0'))
embedding_matrix=pd.read_csv('c:/temp/embedding_matrix.csv',index_col=('Unnamed: 0'))
target=pd.read_csv('c:/temp/target.csv',index_col=('Unnamed: 0'))
input_layer=Input(shape=(50,))
embedding_layer= Embedding(40000,300,weights=[embedding_matrix])(input_layer)

LSTM_layer = Bidirectional(LSTM(128, return_sequences = True))(embedding_layer)
maxpool_layer = GlobalMaxPool1D()(LSTM_layer)

dense_layer_1 = Dense(64, activation="relu")(maxpool_layer)
dropout_1 = Dropout(0.5)(dense_layer_1)

dense_layer_2 = Dense(32, activation="relu")(dropout_1)
dropout_2 = Dropout(0.5)(dense_layer_2)

output_layer = Dense(1, activation="sigmoid")(dropout_2)

model = Model(input_layer,output_layer)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
#%%
# Include the epoch in the file name (uses `str.format`)
Esempio n. 8
0

# In[ ]:


print('Build model...')
comment_input = Input((maxlen,))

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
comment_emb = Embedding(max_features, embedding_dims, input_length=maxlen)(comment_input)

# we add a GlobalMaxPool1D, which will extract information from the embeddings
# of all words in the document
comment_emb = SpatialDropout1D(0.25)(comment_emb)
max_emb = GlobalMaxPool1D()(comment_emb)

# normalized dense layer followed by dropout
main = BatchNormalization()(max_emb)
main = Dense(64)(main)
main = Dropout(0.5)(main)

# We project onto a six-unit output layer, and squash it with sigmoids:
output = Dense(6, activation='sigmoid')(main)

model = Model(inputs=comment_input, outputs=output)

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
Esempio n. 9
0
    #OPCION1 = CONV1, LSTM, BI_LSTM
    # Bidirectional

    if (nn == 'bi'):
        main_embeddings_2 = Bidirectional(
            LSTM(embedding_dim, dropout=0.2,
                 recurrent_dropout=0.2))(main_embeddings_1)

    elif (nn == 'lstm'):
        main_embeddings_2 = LSTM(embedding_dim,
                                 dropout=0.2,
                                 recurrent_dropout=0.2)(main_embeddings_1)

    else:
        output_1 = Conv1D(128, 5, activation='relu')(main_embeddings_1)
        output_2 = GlobalMaxPool1D()(output_1)
        main_embeddings_2 = Dense(10, activation='relu')(output_2)

    #OPCION2 = SIGMOID, SOFTMAX
    main_embeddings_3 = Dense(10, activation='softmax')(main_embeddings_2)

    merged = main_embeddings_3

    # Final predictions
    hidden1 = Dense(10, activation='relu')(merged)
    hidden2 = Dense(10, activation='relu')(hidden1)

    predictions = Dense(1, activation='sigmoid')(hidden2)

    #Create model
    model = Model(inputs=main_embeddings_input, outputs=predictions)
Esempio n. 10
0
    def test_deepin_fm(self):

        try:
            import nltk
            nltk.download('movie_reviews')
            from nltk.corpus import movie_reviews

        except ImportError:
            self.skipTest(
                "NLTK is not not installed.  Reinstall with option 'tests'")

            # download some text data, process it, and create some feature extraction layer to plug in
        print "processing text..."
        samplesize = 2000
        reviews = []
        labels = []
        for rf in movie_reviews.fileids():
            review = movie_reviews.open(rf).read()
            reviews.append(review)
            labels.append(rf.find('pos/') != -1)

        textdata = pd.DataFrame({
            'text': reviews,
            'pos': labels,
            'offset_': np.ones(len(reviews))
        })
        # pre-process text (do same thing Ralph does leave only consecutive alphabetical characters
        textdata['cleantext'] = textdata['text'].map(
            lambda x: (" ".join(re.findall('[A-Za-z]+', x))).encode('utf8'))
        tokens = [i.lower().split(" ") for i in textdata['cleantext']]
        textdata['len'] = [len(t) for t in tokens]
        textdata.len.describe()
        textdata['cat1'] = np.random.randint(0, 9, size=samplesize)
        textdata['cat2'] = np.random.randint(0, 2, size=samplesize)
        textdata['real1'] = np.random.uniform(0, 1, size=samplesize)
        textdata['latenty'] = (textdata.cat1 - 2 * math.pi * textdata.cat2 +
                               textdata.real1 -
                               math.exp(1) * textdata.pos.astype('float') +
                               textdata.real1 * textdata.pos.astype('float') +
                               np.random.normal(size=samplesize))
        # convert to binary indicator
        textdata['y'] = (textdata['latenty'] > 0).astype('int')

        # sequence length cutoff is going to be 75th percentile
        cutoff = int(textdata.len.describe()['75%'])
        tokens = [r[0:min(len(r), cutoff)] for r in tokens]

        # build vocab
        vocab = set()
        counter = 0
        for r in tokens:
            for w in r:
                if w not in vocab:
                    vocab.add(w)

        vocabsize = len(vocab)
        vocab_indices = {}
        index = 1
        for v in vocab:
            vocab_indices[v] = index
            index += 1

        tokens_indexed = []
        for r in tokens:
            tokens_indexed.append([vocab_indices[w] for w in r])

        sequence_mat = sequence.pad_sequences(tokens_indexed,
                                              maxlen=cutoff,
                                              value=0,
                                              padding='post',
                                              truncating='post')

        # build the feature extraction layer
        # do a CNN mimicing ralph's architecture (but of significantly lower dimensionality)
        embed_dim = 10

        word_seq = Input(batch_shape=(None, sequence_mat.shape[1]),
                         name='wordind_seq')
        word_embeddings = Embedding(input_dim=vocabsize + 1,
                                    output_dim=1,
                                    input_length=cutoff,
                                    mask_zero=False)(word_seq)
        word_conv = Convolution1D(filters=10,
                                  kernel_size=3,
                                  activation='relu',
                                  use_bias=True)(word_embeddings)
        pooler = GlobalMaxPool1D()(word_conv)
        word_dense_layer = Dense(units=10, activation='relu')(pooler)
        word_final_layer = Dense(units=embed_dim,
                                 name='textfeats')(word_dense_layer)

        # collect relevant valuesfor deepFM model
        features = [['cat1'], ['cat2'], ['real1'], ['offset_'], ['textseq']]
        feature_dim = [
            len(textdata['cat1'].unique()),
            len(textdata['cat2'].unique()), 1, 1, embed_dim
        ]
        deep_inputs = [word_seq]
        deep_feature = [word_final_layer]
        deepin = [False, False, False, False, True]
        bias_only = [False, False, False, True, False]
        realvalued = [
            False, False, True, False, None
        ]  # doesn't matter what we assign to the deep feature, so just say None
        inputs = [
            textdata['cat1'], textdata['cat2'], textdata['real1'],
            pd.Categorical(textdata['offset_']).codes, sequence_mat
        ]

        # build deep-in FM
        difm_obj = DeepFM(features,
                          feature_dim,
                          realval=realvalued,
                          deepin_feature=deepin,
                          deepin_inputs=deep_inputs,
                          deepin_layers=deep_feature)

        tf.set_random_seed(1)
        np.random.seed(1)
        difm = difm_obj.build_model(embed_dim,
                                    deep_out=False,
                                    bias_only=bias_only,
                                    dropout_input=0,
                                    dropout_layer=0)
        print difm.summary()
        earlyend = EarlyStopping(monitor='val_loss')
        difm.compile(loss='binary_crossentropy',
                     metrics=['accuracy'],
                     optimizer=tf.train.AdamOptimizer())

        try:
            from keras.utils import plot_model
            plot_model(difm, to_file="difm.png")
        except:
            pass

        difm.fit(x=inputs,
                 y=textdata['y'],
                 batch_size=100,
                 epochs=2,
                 verbose=1,
                 callbacks=[earlyend],
                 validation_split=.1,
                 shuffle=True)

        # now add a deep-out layer for the interactions
        tf.set_random_seed(1)
        np.random.seed(1)
        diofm = difm_obj.build_model(embed_dim, deep_out=True)
        # print diofm.summary()
        earlyend = EarlyStopping(monitor='val_loss')
        diofm.compile(loss='binary_crossentropy',
                      metrics=['accuracy'],
                      optimizer=tf.train.AdamOptimizer())

        diofm.fit(x=inputs,
                  y=textdata['y'],
                  batch_size=100,
                  epochs=100,
                  verbose=1,
                  callbacks=[earlyend],
                  validation_split=.1,
                  shuffle=True)