Esempio n. 1
0
    def get_model_defination(self, dataset, embeddings):
        # try:

        # Build the model
        print('Building the model...')

        main_input = Input(shape=[dataset.abs_len, dataset.maxlen],
                           dtype='int32',
                           name='input')  # (None, 36)
        char_input = Input(
            shape=[dataset.abs_len, dataset.maxlen, dataset.maxlen_word],
            dtype='int32',
            name='char_input')  # (None, 36, 25)
        # print 'passed checkpoint 1: input\n\n'
        # pdb.set_trace()
        main_input_r = Lambda(
            lambda x: K.reshape(x, shape=(-1, dataset.maxlen)))(main_input)

        char_input_r = Lambda(lambda x: K.reshape(
            x, shape=(-1, dataset.maxlen, dataset.maxlen_word)))(char_input)

        embeds, _, _ = embeddings.init_weights(dataset.idx2word)

        # print 'passed checkpoint 2: embedding init\n\n'

        embed = Embedding(input_dim=dataset.vocsize,
                          output_dim=embeddings.embed_dim,
                          input_length=dataset.maxlen,
                          weights=[embeds],
                          mask_zero=False,
                          name='embedding',
                          trainable=True)(main_input_r)

        embed = Lambda(lambda x: K.reshape(
            x,
            shape=[-1, dataset.abs_len, dataset.maxlen, embeddings.embed_dim])
                       )(embed)

        # embed = Dropout(0.5, name='embed_dropout')(embed)

        char_embed = Embedding(
            input_dim=dataset.charsize,
            output_dim=embeddings.char_embed_dim,
            embeddings_initializer='lecun_uniform',
            input_length=[dataset.maxlen, dataset.maxlen_word],
            mask_zero=False,
            name='char_embedding')(char_input_r)

        char_embed_shape = char_embed.shape
        char_embed = Lambda(lambda x: K.reshape(
            x, shape=(-1, dataset.maxlen_word, embeddings.char_embed_dim)))(
                char_embed)

        # pdb.set_trace()

        biLSTM_char_embed = Bidirectional(
            CuDNNLSTM(embeddings.char_embed_dim,
                      return_sequences=False))(char_embed)

        # fwd_state = GRU(150, return_state=True)(char_embed)[-2]
        # bwd_state = GRU(150, return_state=True, go_backwards=True)(char_embed)[-2]
        # biLSTM_char_embed = Concatenate(axis=-1)([fwd_state, bwd_state])
        char_embed = Lambda(
            lambda x: K.reshape(x,
                                shape=[
                                    -1, dataset.abs_len, char_embed_shape[1], 2
                                    * embeddings.char_embed_dim
                                ]))(biLSTM_char_embed)

        # char_embed = Dropout(0.5, name='char_embed_dropout')(char_embed)
        # pdb.set_trace()

        combined_embed = Concatenate(name='Sum')([embed, char_embed])

        combined_embed = Lambda(lambda x: K.reshape(
            x,
            shape=(-1, dataset.maxlen,
                   (2 * embeddings.char_embed_dim + embeddings.embed_dim))))(
                       combined_embed)

        # biLSTM_embed= Bidirectional(LSTM(64, return_sequences=True))(combined_embed)

        # with_atention = AttentionWithContext()(biLSTM_embed)

        # with_atention = Lambda(lambda x: K.reshape(x, shape=(-1, dataset.abs_len, 2*64 )))(with_atention)

        biLSTM = Bidirectional(CuDNNLSTM(
            64, return_sequences=False))(combined_embed)
        biLSTM = Dropout(0.5)(biLSTM)

        biLSTM_r = Lambda(lambda x: K.reshape(
            x, shape=(-1, dataset.abs_len, 2 * 64)))(biLSTM)

        norm = BatchNormalization()(biLSTM_r)
        feedforward = Dense(dataset.nclasses, name='feed_forword')(norm)

        final_output = CRF(dataset.nclasses,
                           learn_mode='marginal',
                           sparse_target=True)(feedforward)
        # final_output = Activation('softmax')(feedforward) # (None, 36, 5) # (None, 36, 5)

        # print 'passed checkpoint 7: Final_classifier\n\n'

        model = Model(inputs=[main_input, char_input],
                      outputs=final_output,
                      name='output')
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        # model.summary()
        # plot_model(model, to_file='model.png', show_shapes=True)

        # except Exception as e:
        # 	# print 'passed checkpoint E1\n\n'
        # 	# model.summary()
        # 	# plot_model(model, to_file='model.png', show_shapes=True)

        # 	traceback.print_exc()
        # 	pdb.set_trace()

        return model
Esempio n. 2
0

# # Training

# In[ ]:


from keras.models import Sequential
from keras.layers import CuDNNLSTM, Dense, Bidirectional


# In[ ]:


model = Sequential()
model.add(Bidirectional(CuDNNLSTM(64, return_sequences=True),
                        input_shape=(30, 300)))
model.add(Bidirectional(CuDNNLSTM(64)))
model.add(Dense(1, activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


# In[ ]:


mg = batch_gen(train_df)
model.fit_generator(mg, epochs=20,
                    steps_per_epoch=1000,
Esempio n. 3
0
print('Shape of embedding matrix:', embedding_matrix.shape)

# ### Initialization

print('Build model...')
model = Sequential()

model.add(
    Embedding(num_words,
              embedding_dim,
              weights=[embedding_matrix],
              input_length=MAX_SEQUENCE_LENGTH,
              trainable=False))
#model.add(Dropout(0.2))

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(CuDNNLSTM(128))

model.add(Dense(128, activation='relu'))
model.add(Dense(NLABELS, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop')

print(model.summary())

# ### Learning

epochs = 20
batch_size = 16

history = model.fit(x_train,
Esempio n. 4
0
## 将每个序列调整为相同的长度
train_seq_mat = sequence.pad_sequences(train_seq, maxlen=max_len)
val_seq_mat = sequence.pad_sequences(val_seq, maxlen=max_len)
test_seq_mat = sequence.pad_sequences(test_seq, maxlen=max_len)
print("数据转换序列")
print(train_seq_mat.shape)
print(val_seq_mat.shape)
print(test_seq_mat.shape)
print(train_seq_mat[:2])

#-------------------------------第五步 建立BiLSTM模型--------------------------
num_labels = 4
model = Sequential()
model.add(Embedding(max_words + 1, 128, input_length=max_len))
model.add(Bidirectional(CuDNNLSTM(128)))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_labels, activation='softmax'))
model.summary()
model.compile(
    loss="categorical_crossentropy",
    optimizer='adam',  # RMSprop()
    metrics=["accuracy"])

#-------------------------------第六步 模型训练和预测--------------------------
## 先设置为train训练 再设置为test测试
flag = "test"
if flag == "train":
    print("模型训练")
    ## 模型训练 当val-loss不再提升时停止训练 0.0001
Esempio n. 5
0
def get_model(training, img_h, nclass):
    input_shape = (None, img_h, 1)  # (128, 64, 1)
    #input_shape = (280, img_h, 1)
    # Make Networkw
    inputs = Input(name='the_input', shape=input_shape,
                   dtype='float32')  # (None, 128, 64, 1)
    #inner = resnet.ResNet50(include_top=False, weights = None, input_tensor = inputs)
    inner = shufflenet.ShuffleNet_V2(include_top=False,
                                     weights=None,
                                     input_tensor=inputs)
    # Convolution layer (VGG)
    # CNN to RNN
    #inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner)  # (None, 32, 2048)
    inner = TimeDistributed(Flatten(), name='flatten')(inner)
    #inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)  # (None, 32, 64)

    lstm_unit_num = 256

    # RNN layer
    lstm_1 = CuDNNLSTM(lstm_unit_num,
                       return_sequences=True,
                       kernel_initializer='he_normal',
                       name='lstm1')(inner)  # (None, 32, 512)
    lstm_1b = CuDNNLSTM(lstm_unit_num,
                        return_sequences=True,
                        go_backwards=True,
                        kernel_initializer='he_normal',
                        name='lstm1_b')(inner)
    lstm1_merged = add([lstm_1, lstm_1b])  # (None, 32, 512)
    lstm1_merged = BatchNormalization()(lstm1_merged)

    #lstm1_merged = Dropout(0.1)(lstm1_merged)

    lstm_2 = CuDNNLSTM(lstm_unit_num,
                       return_sequences=True,
                       kernel_initializer='he_normal',
                       name='lstm2')(lstm1_merged)
    lstm_2b = CuDNNLSTM(lstm_unit_num,
                        return_sequences=True,
                        go_backwards=True,
                        kernel_initializer='he_normal',
                        name='lstm2_b')(lstm1_merged)
    lstm2_merged = concatenate([lstm_2, lstm_2b])  # (None, 32, 1024)
    lstm_merged = BatchNormalization()(lstm2_merged)

    #lstm_merged = Dropout(0.1)(lstm_merged)

    # transforms RNN output to character activations:
    inner = Dense(nclass, kernel_initializer='he_normal',
                  name='dense2')(lstm2_merged)  #(None, 32, 63)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[None],
                   dtype='float32')  # (None ,8)
    input_length = Input(name='input_length', shape=[1],
                         dtype='int64')  # (None, 1)
    label_length = Input(name='label_length', shape=[1],
                         dtype='int64')  # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1, ),
                      name='ctc')([y_pred, labels, input_length,
                                   label_length])  #(None, 1)
    model = None
    if training:
        model = Model(inputs=[inputs, labels, input_length, label_length],
                      outputs=loss_out)
    else:
        model = Model(inputs=inputs, outputs=y_pred)
        return model, model
    model.summary()
    multi_model = multi_gpu_model(model, gpus=GPU_NUM)
    save_model = model
    ada = Adadelta()
    #multi_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adam', metrics=['accuracy'])
    multi_model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                        optimizer=ada,
                        metrics=['accuracy'])
    return save_model, multi_model
Esempio n. 6
0
X_test = X[test_idx]
y_test = ratings[test_idx]
val_ratio = 0.1

print('Training data size: {}'.format(X_train.shape))
print('Test data size: {}'.format(X_test.shape))
print('Validation ratio: {} % of training data'.format(val_ratio * 100))

vocab_size = 5000
embedding_size = 32

# define model
model = Sequential()
model.add(Embedding(vocab_size, embedding_size,
                    input_length=max_review_length))
model.add(CuDNNLSTM(128))
model.add(Dense(1, activation=None))

optim = optimizers.Adam(lr=0.001, decay=0.001)
model.compile(loss='mse', optimizer='adam', metrics=['mse'])
tensorboard = TensorBoard(log_dir='./logs', write_graph=True)
earlystopping = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=2,
                              verbose=0,
                              mode='auto')

model.fit(X_train,
          y_train,
          batch_size=64,
          epochs=20,
Esempio n. 7
0
def build_model(size_embeddings,
                window_length,
                number_words,
                number_positions,
                number_labels,
                embeddings=None):

    LSTM_UNITS = 128
    DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS

    if embeddings is not None:

        size_embeddings = embeddings.shape[1]
        embedding_layer = Embedding(number_words,
                                    size_embeddings,
                                    weights=[embeddings],
                                    input_length=window_length,
                                    trainable=False,
                                    name='embedded_words')
    else:
        embedding_layer = Embedding(number_words,
                                    size_embeddings,
                                    input_length=window_length,
                                    trainable=False,
                                    name='embedded_words')
    embedding_distance_layer = Embedding(number_positions,
                                         50,
                                         input_length=window_length,
                                         trainable=True,
                                         name='embedded_distances')

    sequence_sent_input = Input(shape=(window_length, ),
                                dtype='int32',
                                name='sequence_words')
    embedded_sent = embedding_layer(sequence_sent_input)
    embedded_sent = Dropout(0.3)(embedded_sent)

    sequence_dist_input = Input(shape=(window_length, ),
                                dtype='int32',
                                name='sequence_distances')
    embedded_dist = embedding_distance_layer(sequence_dist_input)

    merged = concatenate([embedded_sent, embedded_dist])

    x = SpatialDropout1D(0.3)(merged)
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x)
    x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x)

    hidden = concatenate([
        GlobalMaxPooling1D()(x),
        GlobalAveragePooling1D()(x),
    ])
    hidden = add(
        [hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)])
    hidden = add(
        [hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)])
    merged = Dense(number_labels, activation='softmax')(hidden)
    model = Model(inputs=[sequence_sent_input, sequence_dist_input],
                  outputs=[merged])

    model.summary()

    return model
Esempio n. 8
0
from keras.initializers import orthogonal

np.random.seed(123)

#%%
in_dim = LSTM_inputs_train_data.shape[2]
out_dim = num_classes
hidden_size = 125
batch_size = 302
epochs = 100

model = Sequential()
model.add(
    CuDNNLSTM(hidden_size,
              return_sequences=False,
              batch_input_shape=(None, time_length, in_dim),
              kernel_initializer=glorot_uniform(seed=123),
              recurrent_initializer=orthogonal(gain=1.0, seed=123)))
model.add(
    Dense(out_dim,
          activation='softmax',
          kernel_initializer=glorot_uniform(seed=123)))

Adamax = optimizers.Adamax(lr=0.002,
                           beta_1=0.9,
                           beta_2=0.999,
                           epsilon=None,
                           decay=0.0)
model.compile(loss='kullback_leibler_divergence',
              optimizer=Adamax,
              metrics=['categorical_accuracy'])
Esempio n. 9
0
valid_y = pd.DataFrame(all_answer[30000:35000].tolist(), columns=['label'])
#test_y = pd.DataFrame(all_answer[35000:].tolist(),columns=['label'])

train_y = pd.get_dummies(train_y['label']).values
valid_y = pd.get_dummies(valid_y['label']).values
#test_y = pd.get_dummies(test_y['label']).values

maxvalue = train_x.max()
minvalue = train_x.min()
div = maxvalue - minvalue
train_x = (train_x - minvalue) / div
valid_x = (valid_x - minvalue) / div
test_x = (test_x - minvalue) / div

model = Sequential()
model.add(CuDNNLSTM(100, input_shape=(train_x.shape[1], train_x.shape[2])))
#model.add(Dropout(0.2))
model.add(Dense(9, activation='softmax'))  # 输出层
adam = optimizers.adam(lr=0.005)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
# model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True,dropout=dropout))
model.summary()

# fit network
history = model.fit(train_x,
                    train_y,
                    epochs=50,
                    batch_size=72,
                    validation_data=(valid_x, valid_y),
Esempio n. 10
0
    def build_models(self):
        def last_image(tensor):
            return tensor[:, -1, :]

        image_input = Input(shape=(self.state_len, self.height, self.width, 3))
        xf = TimeDistributed(
            Conv2D(90, (9, 9), activation='relu', padding='same'))(image_input)
        xf = TimeDistributed(MaxPooling2D((2, 2)))(xf)
        xf = TimeDistributed(BatchNormalization())(xf)
        print(xf.shape)
        xf = TimeDistributed(
            Conv2D(60, (6, 6), activation='relu', padding='same'))(xf)
        xf = TimeDistributed(MaxPooling2D((2, 2)))(xf)
        xf = TimeDistributed(BatchNormalization())(xf)
        print(xf.shape)
        xf = TimeDistributed(
            Conv2D(60, (5, 5), activation='relu', padding='same'))(xf)
        xf = TimeDistributed(MaxPooling2D((3, 3)))(xf)
        features = TimeDistributed(Flatten())(xf)
        print('feature shape is: ', features.shape)
        feature_model = Model(image_input, features)

        state_current = Input(shape=(self.state_len, self.height, self.width,
                                     3),
                              name='state_current')
        state_next = Input(shape=(self.state_len, self.height, self.width, 3),
                           name='state_next')
        feature_current = feature_model(state_current)
        last_feature = Lambda(last_image)(feature_current)
        feature_dimension = int(last_feature.shape[1])

        feature_next = feature_model(state_next)
        inverse_input = Concatenate()([feature_current, feature_next])
        xi = TimeDistributed(Dense(feature_dimension,
                                   activation='relu'))(inverse_input)
        xi = TimeDistributed(Dense(feature_dimension, activation='relu'))(xi)
        xi = CuDNNLSTM(50, return_sequences=False)(inverse_input)
        xi = Dense(50, activation='relu')(xi)
        inverse_output = Dense(8, activation='softmax',
                               name='inverse_output')(xi)

        input_action = Input(shape=(8, ), name='action')
        recurrent_branch = CuDNNLSTM(50,
                                     return_sequences=False)(feature_current)

        forward_input = Concatenate()(
            [last_feature, input_action, recurrent_branch])

        xfo = Dense(feature_dimension, activation='relu')(forward_input)
        xfo = Dense(feature_dimension, activation='relu')(xfo)
        forward_output = Dense(feature_dimension, activation='relu')(xfo)
        last_feature_next = Lambda(last_image)(feature_next)

        icm = Model(inputs=[input_action, state_current, state_next],
                    outputs=[inverse_output])

        def icm_loss(ytrue, ypred):
            return self.beta * K.mean(
                0.5 * K.square(forward_output - last_feature_next), axis=1) + (
                    1 - self.beta) * K.categorical_crossentropy(ytrue, ypred)

        icm.compile(loss=icm_loss, optimizer=self.adam2, metrics=['accuracy'])

        ireward_output = Lambda(
            lambda x: K.mean(0.5 * K.square(x[0] - x[1]), axis=1))(
                [forward_output, last_feature_next])
        ireward = Model(inputs=[input_action, state_current, state_next],
                        outputs=ireward_output)

        #main_input = Input(shape=(self.state_len, self.height, self.width, 3))
        x = TimeDistributed(Dense(feature_dimension,
                                  activation='relu'))(feature_current)
        x = TimeDistributed(Dense(feature_dimension, activation='relu'))(x)
        reward_input = Input(shape=(8, ))
        x = Dense(50, activation='relu')(x)
        main_output = Dense(8, activation='softmax')(x)

        main_model = Model([state_current, reward_input], main_output)
        main_model.add_loss(self.sample_loss(main_output, reward_input))
        main_model.compile(optimizer=self.adam1)
        return main_model, icm, feature_model, ireward
Esempio n. 11
0
def create_rnn_model(rnnModel, rnn_type, inputSize, outputShape):
    """
        Function to create my rnn neural network
		Arguments: rnnModel: keras rnnModel
				   type: string input: choose model: GRU, LSTM
				   inputSize: training input size with shape (time_length,features)
                   outputShape: a training output shape (h,w,colorChannel) colorChannel should be 1 here
		Return: model after set up
    """
    # If doesn't given rnn_type and inputSize return false
    # if rnn_type and inputSize:
    #     sys.exit()

    if (rnn_type == 'GRU'):
        rnnModel.add(
            CuDNNGRU(units=64,
                     kernel_initializer='glorot_uniform',
                     recurrent_initializer='orthogonal',
                     bias_initializer='zeros',
                     kernel_regularizer=None,
                     recurrent_regularizer=None,
                     bias_regularizer=None,
                     activity_regularizer=None,
                     kernel_constraint=None,
                     recurrent_constraint=None,
                     bias_constraint=None,
                     return_sequences=True,
                     return_state=False,
                     stateful=False,
                     input_shape=inputSize))

    if (rnn_type == 'LSTM'):
        rnnModel.add(
            CuDNNLSTM(units=64,
                      kernel_initializer='glorot_uniform',
                      recurrent_initializer='orthogonal',
                      bias_initializer='zeros',
                      unit_forget_bias=True,
                      kernel_regularizer=None,
                      recurrent_regularizer=None,
                      bias_regularizer=None,
                      activity_regularizer=None,
                      kernel_constraint=None,
                      recurrent_constraint=None,
                      bias_constraint=None,
                      return_sequences=True,
                      return_state=False,
                      stateful=False,
                      input_shape=inputSize))
    print(np.prod(outputShape))

    # Can try leakyRelu here
    rnnModel.add(
        Dense(128, activation='relu',
              kernel_regularizer=regularizers.l2(0.01)))
    rnnModel.add(
        Dense(256, activation='relu',
              kernel_regularizer=regularizers.l2(0.01)))
    rnnModel.add(
        Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    rnnModel.add(Dense(outputShape[1], activation='relu'))

    rnnModel.compile(loss='mean_squared_error',
                     optimizer='Adam',
                     metrics=['accuracy'])

    return rnnModel
            embedding_matrix[index] = embedding_vector


'''
model = Sequential()
model.add(Embedding(config.vocab_size, 100, input_length=config.maxlen, weights=[embedding_matrix], trainable=False))
model.add(Flatten())
model.add(Dense(100, activation="relu"))
model.add(Dense(100,activation="relu"))
model.add(Dropout(0.6))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
'''
model = Sequential()
model.add(Embedding(config.vocab_size, 100, input_length=config.maxlen))
model.add(CuDNNLSTM(config.hidden_dims))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
model.fit(X_train, y_train,
          batch_size=config.batch_size,
          epochs=config.epochs,
          validation_data=(X_test, y_test), callbacks=[WandbCallback()])




Esempio n. 13
0
def build_model2(lr=0.0,
                 lr_d=0.0,
                 units=0,
                 spatial_dr=0.0,
                 kernel_size1=3,
                 kernel_size2=2,
                 dense_units=128,
                 dr=0.1,
                 conv_size=32):
    file_path = "best_model.hdf5"
    check_point = ModelCheckpoint(file_path,
                                  monitor="val_loss",
                                  verbose=1,
                                  save_best_only=True,
                                  mode="min")
    early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=3)

    inp = Input(shape=(max_len, ))
    x = Embedding(19479,
                  embed_size,
                  weights=[embedding_matrix],
                  trainable=False)(inp)
    x1 = SpatialDropout1D(spatial_dr)(x)

    x_gru = Bidirectional(CuDNNGRU(units, return_sequences=True))(x1)
    x_lstm = Bidirectional(CuDNNLSTM(units, return_sequences=True))(x1)

    x_conv1 = Conv1D(conv_size,
                     kernel_size=kernel_size1,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_gru)
    avg_pool1_gru = GlobalAveragePooling1D()(x_conv1)
    max_pool1_gru = GlobalMaxPooling1D()(x_conv1)

    x_conv2 = Conv1D(conv_size,
                     kernel_size=kernel_size2,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_gru)
    avg_pool2_gru = GlobalAveragePooling1D()(x_conv2)
    max_pool2_gru = GlobalMaxPooling1D()(x_conv2)

    x_conv3 = Conv1D(conv_size,
                     kernel_size=kernel_size1,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_lstm)
    avg_pool1_lstm = GlobalAveragePooling1D()(x_conv3)
    max_pool1_lstm = GlobalMaxPooling1D()(x_conv3)

    x_conv4 = Conv1D(conv_size,
                     kernel_size=kernel_size2,
                     padding='valid',
                     kernel_initializer='he_uniform')(x_lstm)
    avg_pool2_lstm = GlobalAveragePooling1D()(x_conv4)
    max_pool2_lstm = GlobalMaxPooling1D()(x_conv4)

    x = concatenate([
        avg_pool1_gru, max_pool1_gru, avg_pool2_gru, max_pool2_gru,
        avg_pool1_lstm, max_pool1_lstm, avg_pool2_lstm, max_pool2_lstm
    ])
    x = BatchNormalization()(x)
    x = Dropout(dr)(Dense(dense_units, activation='relu')(x))
    x = BatchNormalization()(x)
    x = Dropout(dr)(Dense(int(dense_units / 2), activation='relu')(x))
    x = Dense(5, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(lr=lr, decay=lr_d),
                  metrics=["accuracy"])
    history = model.fit(X_train,
                        y_ohe,
                        batch_size=128,
                        epochs=20,
                        validation_split=0.1,
                        verbose=1,
                        callbacks=[check_point, early_stop])
    model = load_model(file_path)
    return model
  def __init__(self, config, pretrained_embedding):
    self._input         = tf.placeholder(dtype=tf.int32,shape=[None,None],name='input')
    self._target        = tf.placeholder(dtype=tf.int32,shape=[None],name='target')
    self.batch_size     = config['batch_size']
    self.num_steps      = config['num_steps']
    self.embed_size     = config['embed_size']
    self.size           = config['hidden_size']
    self._lr            = config['lr']
    self.num_classes    = config['num_classes']
    self.keep_prob      = tf.Variable(config['keep_prob'],trainable=False)
    self.combine_mode   = config['combine_mode']
    self.weight_decay   = config['weight_decay']


    #
    # outputs = LSTMEncoderWithEmbedding(self._input,self.embed_size,self.size,\
    #                          config['vocab_size'],self.num_steps,\
    #                          self.keep_prob,embedding=pretrained_embedding,\
    #                          num_layers=config['num_layers'],\
    #                          variational_dropout=True,\
    #                          combine_mode='last').get_output()

    embed = Embedding(config['vocab_size']+1, self.embed_size)(self._input)
    outputs = tf.nn.dropout(embed,keep_prob=self.keep_prob)
    # outputs = Bidirectional(CuDNNLSTM(self.size,return_sequences=True))(outputs)
    # outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob)
    outputs = Bidirectional(CuDNNLSTM(self.size,return_sequences=True))(outputs)

    self.size = int(outputs.get_shape().as_list()[-1])
    if self.combine_mode =='weight':
        outputs = tf.reshape(outputs,[-1,self.size])
        weights = Dense(1,activation='tanh')(outputs)
        outputs = tf.multiply(outputs,weights)
        outputs = tf.reshape(outputs,[-1,self.num_steps,self.size])
        outputs = tf.reduce_sum(outputs,axis=1)
    elif self.combine_mode =='last':
        outputs = outputs[:,-1,:]
    elif self.combine_mode =='all':
        weights = Dense(1,activation='tanh')(outputs)
        outputs_weighted = tf.multiply(outputs,weights)
        outputs_weighted = tf.reshape(outputs_weighted,[-1,self.num_steps,2*self.size])
        outputs_weighted = tf.reduce_sum(outputs_weighted,axis=1)
        outputs_last = outputs[:,-1,:]
        outputs_mean = tf.reduce_mean(outputs,axis=1)
        outputs_max  = tf.reduce_max(outputs,axis=1)
        outputs_min  = tf.reduce_min(outputs,axis=1)
        outputs = tf.concat([outputs_last,outputs_mean,outputs_max,outputs_min,outputs_weighted],axis=-1)
#     outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob)

    embed_avg = tf.reduce_mean(embed,axis=1)
#     embed_max = tf.reduce_max(embed,axis=1)
#     embed_min = tf.reduce_min(embed,axis=1)
#     outputs = tf.concat([outputs,embed_avg,embed_min,embed_max],axis=-1)
    outputs = tf.concat([outputs,embed_avg]   ,axis=-1)
    # outputs = tf.contrib.layers.fully_connected(outputs,self.size)
#     outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob)
    # softmax_w = tf.get_variable("softmax_w", [self.size, self.num_classes], dtype=tf.float32)
    # softmax_b = tf.get_variable("softmax_b", [self.num_classes], dtype=tf.float32)
    # logits    = tf.matmul(outputs, softmax_w) + softmax_b
    logits = Dense(self.num_classes,activation=None)(outputs)


    # update the cost variables
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self._target,logits=logits)
    self.l2_loss =  sum(tf.nn.l2_loss(tf_var)
        for tf_var in tf.trainable_variables()
        )
    self._cost = cost = tf.reduce_mean(loss) + self.weight_decay*self.l2_loss

    self._lr = tf.Variable(self._lr, trainable=False)
    tvars    = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                      config['max_grad_norm'])
    optimizer = tf.train.AdamOptimizer(self._lr)
#     optimizer = tf.train.GradientDescentOptimizer(self._lr)

    self._train_op = optimizer.apply_gradients(zip(grads, tvars))

    self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
    self._lr_update = tf.assign(self._lr, self._new_lr)
    self.predicted_class = tf.cast(tf.argmax(tf.nn.softmax(logits),axis=-1),tf.int32)
Esempio n. 15
0
def getModel(import_model,
             max_features,
             maxlen,
             embedding_size,
             lstm_size,
             forget_bias,
             recurrent_dropout,
             dropout,
             stateful,
             iLSTM,
             scale_amount,
             x_train,
             y_train,
             x_test,
             y_test,
             y_cell_train,
             y_cell_test,
             wi_size,
             batch_size,
             epochs,
             embedding_matrix,
             model_fn,
             file_name,
             dataset,
             use_wv,
             data_path,
             trainable,
             embedding_dropout,
             word_dropout,
             use_L2,
             use_decay,
             rewrite_scores,
             learn_rate,
             use_CNN,
             filters,
             kernel_size,
             pool_size,
             score_fn,
             scale_amount_2,
             two_step=None,
             prev_model=None,
             extra_output_layer=None):
    if two_step is not None and prev_model is None and iLSTM:
        epochs = 8
    print('Build model...')

    if dataset == 2:
        output_size = len(y_train[0])
        output_activation = "softmax"
        loss = "categorical_crossentropy"
    else:
        output_size = 1
        output_activation = "sigmoid"
        loss = "binary_crossentropy"

    if use_decay:
        optimizer = Adam(lr=learn_rate, decay=0.9999)  # removed clipping
    else:
        optimizer = Adam(lr=learn_rate)
    if iLSTM:
        if lstm_size > len(y_cell_test[0]):
            print(">>> Using spare metrics/nodes")
            iLSTM_loss = keras.losses.spare_mse
            iLSTM_metric = keras.metrics.sp_acc
        else:
            iLSTM_loss = "mse"
            iLSTM_metric = "accuracy"

    tensorboard = TensorBoard(log_dir='/home/tom/Desktop/Logs/' +
                              str(dataset) + "/" + file_name + '/',
                              histogram_freq=0,
                              write_graph=True,
                              write_images=True)

    model = None
    print("lstm size", lstm_size)

    if import_model is None and os.path.exists(
            model_fn) is False and prev_model is None:
        print("L0 Input layer", maxlen)
        sequence_input = Input(shape=(maxlen, ), dtype=np.int32)  #

        if word_dropout > 0.0:
            sequence_input = Dropout(word_dropout,
                                     input_shape=(maxlen, ),
                                     dtype=np.int32)
            prev_layer = sequence_input
        else:
            prev_layer = sequence_input

        if use_wv:
            print("L1 pre-trained word embeddings", wi_size, embedding_size,
                  maxlen, False, trainable)
            embedding_layer = Embedding(wi_size,
                                        embedding_size,
                                        weights=[embedding_matrix],
                                        input_length=maxlen,
                                        trainable=trainable)(prev_layer)
        else:
            print("L1 trainable embeddings", wi_size, embedding_size, maxlen,
                  True)
            embedding_layer = Embedding(wi_size,
                                        embedding_size,
                                        input_length=maxlen,
                                        trainable=True)(prev_layer)

        if embedding_dropout > 0.0:
            dropout_layer = Dropout(embedding_dropout)(embedding_layer)
            prev_lstm_layer = dropout_layer
        else:
            prev_lstm_layer = embedding_layer

        if use_CNN:
            prev_conv_layer = prev_lstm_layer
            conv = Conv1D(filters,
                          kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1)(prev_conv_layer)
            prev_lstm_layer = conv

        if iLSTM:
            if dropout > 0.0 or recurrent_dropout > 0.0:
                print("L2 dropout LSTM", lstm_size, forget_bias, dropout,
                      recurrent_dropout)
                hidden_layer, h_l2, cell_state = LSTM(
                    units=lstm_size,
                    dropout=dropout,
                    recurrent_dropout=recurrent_dropout,
                    unit_forget_bias=forget_bias,
                    return_state=True,
                    kernel_regularizer=l2(use_L2))(prev_lstm_layer)
            else:
                print("L2 no_dropout CuDNNLSTM", lstm_size, forget_bias)
                hidden_layer, h_l2, cell_state = CuDNNLSTM(
                    units=lstm_size,
                    unit_forget_bias=forget_bias,
                    return_state=True,
                    kernel_regularizer=l2(use_L2))(prev_lstm_layer)
        else:
            if dropout > 0.0 or recurrent_dropout > 0.0:
                print("L2 dropout LSTM", lstm_size, forget_bias, dropout,
                      recurrent_dropout)

                hidden_layer = LSTM(
                    units=lstm_size,
                    dropout=dropout,
                    recurrent_dropout=recurrent_dropout,
                    unit_forget_bias=forget_bias,
                    kernel_regularizer=l2(use_L2))(prev_lstm_layer)
            else:
                print("L2 no_dropout CuDNNLSTM", lstm_size, forget_bias)
                hidden_layer = CuDNNLSTM(
                    units=lstm_size,
                    unit_forget_bias=forget_bias,
                    kernel_regularizer=l2(use_L2))(prev_lstm_layer)

        if extra_output_layer:
            ex_output = Dense(lstm_size, activation="linear")(hidden_layer)
            hidden_layer = ex_output

        print("L3 output layer", output_size, output_activation)
        output_layer = Dense(output_size,
                             activation=output_activation)(hidden_layer)

        if iLSTM:

            if extra_output_layer:
                model = Model(sequence_input, [output_layer, ex_output])
            else:
                model = Model(sequence_input, [output_layer, h_l2])
            model.compile(
                loss=[loss, iLSTM_loss],
                optimizer=optimizer,
                metrics=[iLSTM_metric],
                loss_weights=[1.0 * scale_amount_2, 1.0 * scale_amount])
            print('Train...')
            model.fit(x_train, [y_train, y_cell_train],
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_data=(x_test, [y_test, y_cell_test]),
                      callbacks=[tensorboard])

        else:
            model = Model(sequence_input, output_layer)
            model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

            print('Train...')
            model.fit(x_train,
                      y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      validation_data=(x_test, y_test),
                      callbacks=[tensorboard])

    elif prev_model is not None:
        print("Two step")
        model = prev_model
        model.compile(loss=[loss, iLSTM_loss],
                      optimizer=optimizer,
                      metrics=[iLSTM_metric],
                      loss_weights=[1.0 * two_step[1], 1.0 * two_step[0]])
        print('Train...')
        model.fit(x_train, [y_train, y_cell_train],
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(x_test, [y_test, y_cell_test]),
                  callbacks=[tensorboard])
    elif import_model is not None:
        print("Loading model...")
        model = load_model(data_path + "model/" + import_model)
    elif rewrite_scores is True or os.path.exists(score_fn) is False:
        model = load_model(model_fn)
    else:
        model = None
    return model
Esempio n. 16
0
               name='3_conv_layer'))
    model.add(ELU())

    model.add(
        Conv2D(num_filters,
               kernel_size=size_of_kernel,
               strides=kernel_strides,
               kernel_initializer='glorot_normal',
               name='4_conv_layer'))
    model.add(ELU())

    model.add(Reshape((8, num_filters * num_sensors)))

    model.add(
        CuDNNLSTM(lstm_output,
                  kernel_initializer='glorot_normal',
                  return_sequences=True,
                  name='1_lstm_layer'))

    model.add(Dropout(dropout_prob, name='1_dropout_layer'))

    model.add(
        CuDNNLSTM(lstm_output,
                  kernel_initializer='glorot_normal',
                  return_sequences=False,
                  name='2_lstm_layer'))

    model.add(Dropout(dropout_prob, name='2_dropout_layer'))

    model.add(
        Dense(512,
              kernel_initializer='glorot_normal',
valid = values[n_train_hours:8000, :]
test = values[8000:10000, :]
n_obs = n_hours * n_features
train_X, train_y = train[:, :n_obs], train[:, -n_features]
valid_X, valid_y = valid[:, :n_obs], valid[:, -n_features]
test_X, test_y = test[:, :n_obs], test[:, -n_features]
print(train_X.shape, len(train_X), train_y.shape)
train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
valid_X = valid_X.reshape((valid_X.shape[0], n_hours, n_features))
test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

#---------------------------------------------------------------------------------------------------------------------------
# design network
model3 = Sequential()
model3.add(CuDNNLSTM(256, input_shape=(train_X.shape[1], train_X.shape[2])))
model3.add(Dense(1))

model3.compile(loss='mae', optimizer='Adam')

# fit network

history3 = model3.fit(train_X,
                      train_y,
                      epochs=Epoch,
                      batch_size=72,
                      validation_data=(test_X, test_y),
                      verbose=True,
                      shuffle=False)

'----------------------------'
Esempio n. 18
0
    y = np.array([
        lb[i * stride + input_dim // 2 - output_dim // 2:i * stride +
           input_dim // 2 - output_dim // 2 + output_dim]
        for i in range((len(lb) - input_dim) // stride)
    ])
    y = to_categorical(y, num_classes=2)
    y_train.append(y)

timestep = len(x_train[0])
x_train = np.array(x_train)
y_train = np.array(y_train)
'''build neural'''
input = Input(shape=(timestep, input_dim))
classifier = input
'''bidirectional LSTM'''
o1 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(classifier)
o1 = Dropout(0.2)(o1)
o1 = BatchNormalization()(o1)

o2 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o1)
o2 = Add()([o1, o2])
o2 = Dropout(0.2)(o2)
o2 = BatchNormalization()(o2)

o3 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o2)
o3 = Add()([o1, o2, o3])
o3 = Dropout(0.2)(o3)
o3 = BatchNormalization()(o3)
'''attention model'''
oa = TimeDistributed(Dense(filter_size * 2, activation='softmax'))(o3)
o3 = Multiply()([o3, oa])
# yxtay way - https://github.com/yxtay/char-rnn-text-generation

# model.add(Embedding(len(chars), 32, batch_input_shape=(128, maxlen)))
# model.add(Dropout(DROPOUT_VAL))
# model.add(LSTM(LSTM_DIM, return_sequences=False, stateful=True))

# 1 layer only

# model.add(LSTM(LSTM_DIM, input_shape=(maxlen, len(chars)), return_sequences=False))

# 3-layers LSTM

model.add(
    CuDNNLSTM(LSTM_DIM,
              input_shape=(maxlen, len(chars)),
              return_sequences=True))
model.add(Dropout(DROPOUT_VAL))
model.add(CuDNNLSTM(LSTM_DIM, return_sequences=True))
model.add(Dropout(DROPOUT_VAL))
model.add(CuDNNLSTM(LSTM_DIM))
model.add(Dropout(DROPOUT_VAL))

# model.add(Dense(LSTM_DIM))        # extra Dense
# model.add(Dropout(DROPOUT_VAL))   # extra Dense

model.add(Dense(len(chars)))
model.add(Activation("softmax"))

optimizer = Adam(lr=0.001, clipnorm=5.0, clipvalue=0.5)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
Esempio n. 20
0
def get_layer(inp_a, inp_b, tk):
    def load_embedding(toka, max_features):
        def get_coefs(token, *arr):
            return token, np.asarray(arr, dtype='float32')

        embedding_index = dict(
            get_coefs(*o.strip().split(" "))
            for o in open(embedding_path, encoding="utf-8"))

        word_index = toka.word_index
        nub_words = min(max_features, len(word_index))
        embedding_matrix_ = np.zeros((nub_words + 1, embed_size))
        for word, i in word_index.items():
            if i >= max_features:
                continue
            embedding_vector = embedding_index.get(word)
            if embedding_vector is not None:
                embedding_matrix_[i] = embedding_vector
        return embedding_matrix_, nub_words

    def get_pooling(x):
        avg_pool_x = GlobalAveragePooling1D()(x)
        max_pool_x = GlobalMaxPooling1D()(x)
        return avg_pool_x, max_pool_x

    embedding_matrix, nb_words = load_embedding(tk, 10_0000)

    embed_layer_a = Embedding(nb_words + 1,
                              embed_size,
                              weights=[embedding_matrix],
                              trainable=False)
    embed_layer_b = Embedding(nb_words + 1,
                              embed_size,
                              weights=[embedding_matrix],
                              trainable=False)

    x_a = embed_layer_a(inp_a)
    x_b = embed_layer_b(inp_b)

    x_a = SpatialDropout1D(0.3)(x_a)
    x_b = SpatialDropout1D(0.3)(x_b)

    xc_a = Bidirectional(CuDNNLSTM(32, return_sequences=True))(x_a)
    xc_b = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x_b)

    xc_a_cons = Bidirectional(CuDNNLSTM(32, return_sequences=True))(x_a)
    xc_b_cons = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x_b)
    avg_pool_ac3, max_pool_ac3 = get_pooling(xc_a_cons)
    avg_pool_bc3, max_pool_bc3 = get_pooling(xc_b_cons)

    x_ac = concatenate([avg_pool_ac3, max_pool_ac3])
    x_ac = BatchNormalization()(x_ac)
    x_ac = Dropout(0.3)(Dense(32, activation='relu')(x_ac))

    x_bc = concatenate([avg_pool_bc3, max_pool_bc3])
    x_bc = BatchNormalization()(x_bc)
    x_bc = Dropout(0.3)(Dense(32, activation='relu')(x_bc))
    xm = Multiply()([x_ac, x_bc])
    xm = BatchNormalization()(xm)
    xm = Dropout(0.3)(Dense(32, activation='relu')(xm))

    x_a_c_3 = Conv1D(32,
                     kernel_size=3,
                     padding='valid',
                     kernel_initializer='he_uniform')(xc_a)
    x_b_c_3 = Conv1D(64,
                     kernel_size=3,
                     padding='valid',
                     kernel_initializer='he_uniform')(xc_b)

    avg_pool_a3, max_pool_a3 = get_pooling(x_a_c_3)
    avg_pool_b3, max_pool_b3 = get_pooling(x_b_c_3)

    x_a = concatenate([avg_pool_a3, max_pool_a3])
    x_a = BatchNormalization()(x_a)
    x_a = Dropout(0.3)(Dense(32, activation='relu')(x_a))

    x_b = concatenate([avg_pool_b3, max_pool_b3])
    x_b = BatchNormalization()(x_b)
    x_b = Dropout(0.3)(Dense(32, activation='relu')(x_b))

    # xm = Multiply()([x_a, x_b])
    # xm = BatchNormalization()(xm)
    # xm = Dropout(0.3)(Dense(32, activation='relu')(xm))

    # d1 = Dot(1)([x_a, x_e])
    # d2 = Dot(1)([x_a, x_e2])

    x = concatenate([x_a, x_b, xm])
    x = BatchNormalization()(x)
    x = Dropout(0.3)(Dense(32, activation='relu')(x))

    out = Dense(2, activation="sigmoid")(x)
    return out
fcst_test_X = fcst_input.reshape((fcst_input.shape[0], 1, fcst_input.shape[1]))
print("forecast data:", fcst_test_X.shape, fcst_labels.shape)

# set random seeds for model reproducibility as suggested in:
# https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
rn.seed(12345)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
tf.set_random_seed(1234)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

# define model
model = Sequential()
model.add(CuDNNLSTM(units=n_neurons, unit_forget_bias=True, bias_regularizer=L1L2(l1=0.01, l2=0.01)))
# model.add(LSTM(units=n_neurons, activation='tanh', input_shape=(None, train_X.shape[2]), use_bias=True,
#                bias_regularizer=L1L2(l1=0.01, l2=0.01)))  # This is hidden layer
model.add(Dropout(.355))
model.add(Dense(activation='linear', units=n_ahead-1, use_bias=True))  # this is output layer
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss=rmse, optimizer=adam)
earlystop = keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.00000001, patience=5, verbose=1, mode='auto')
history = model.fit(train_X, train_y, batch_size=n_batch, epochs=n_epochs, verbose=2, shuffle=False,
                    callbacks=[earlystop])

# plot model history
# plt.plot(history.history['loss'], label='train')
# plt.xlabel("Epochs")
# plt.ylabel("Loss")
# plt.tight_layout()
from keras.layers import GlobalMaxPool1D, SpatialDropout1D, MaxPooling1D, Flatten
from keras.layers import Bidirectional
from keras.models import Model

early_stop = EarlyStopping(monitor="val_loss",
                           mode="min",
                           patience=3,
                           verbose=1)

print("Building layers")
nb_epoch = 25
print('starting to stitch and compile  model')
# Embedding layer for text inputs
input_words = Input((max_len, ))
x_words = Embedding(vocab_size, 300, input_length=max_len)(input_words)
x_words = CuDNNLSTM(256, return_sequences=True)(x_words)
x_words = Dropout(0.20)(x_words)
x_words = Conv1D(128, 3, strides=1, activation='relu')(x_words)
x_words = GlobalMaxPool1D()(x_words)
x_words = Dropout(0.2)(x_words)

x = Dense(64, activation="relu")(x_words)
# x = Dropout(0.2)(x)
predictions = Dense(20, activation="softmax")(x)

model = Model(inputs=[input_words], outputs=predictions)
model.compile(optimizer='nadam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
print(model.summary())
Esempio n. 23
0
def build_model(lr=0.0, lr_d=0.0):
    inp_a = Input(shape=(max_len_a,))
    inp_b = Input(shape=(max_len_b,))
    x_a = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False)(inp_a)
    x_b = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False)(inp_b)
    x_a = SpatialDropout1D(0.3)(x_a)
    x_b = SpatialDropout1D(0.3)(x_b)

    xc_a = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x_a)
    xc_b = Bidirectional(CuDNNLSTM(512, return_sequences=True))(x_b)

    xc_a_3 = Conv1D(16, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(xc_a)
    xc_a_2 = Conv1D(16, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(xc_a)

    xc_b_3 = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(xc_b)
    xc_b_2 = Conv1D(64, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(xc_b)

    avg_pool_a3 = GlobalAveragePooling1D()(xc_a_3)
    max_pool_a3 = GlobalMaxPooling1D()(xc_a_3)
    avg_pool_a2 = GlobalAveragePooling1D()(xc_a_2)
    max_pool_a2 = GlobalMaxPooling1D()(xc_a_2)

    avg_pool_b3 = GlobalAveragePooling1D()(xc_b_3)
    max_pool_b3 = GlobalMaxPooling1D()(xc_b_3)
    avg_pool_b2 = GlobalAveragePooling1D()(xc_b_2)
    max_pool_b2 = GlobalMaxPooling1D()(xc_b_2)

    x_a = concatenate([avg_pool_a3, max_pool_a3, avg_pool_a2, max_pool_a2])
    x_a = BatchNormalization()(x_a)
    x_a = Dropout(0.3)(Dense(32, activation='relu')(x_a))

    x_b = concatenate([avg_pool_b3, max_pool_b3, avg_pool_b2, max_pool_b2])
    x_b = BatchNormalization()(x_b)
    x_b = Dropout(0.1)(Dense(64, activation='relu')(x_b))

    x = concatenate([x_a, x_b])
    x = BatchNormalization()(x)
    x = Dropout(0.2)(Dense(64, activation='relu')(x))

    x = Dense(2, activation="sigmoid")(x)

    """:fine-tune"""
    model = Model(inputs=[inp_a, inp_b], outputs=x)
    model.trainable = True
    for layer in model.layers[:1]:
        layer.trainable = False
    model.summary()

    """:train"""
    model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy"])
    # model.fit_generator
    model.fit([X_train_a, X_train_b], y_ohe,
              batch_size=24,
              epochs=20,
              validation_split=0.3,
              verbose=1,
              class_weight='auto',
              callbacks=[check_point, early_stop, tb_cb])
    K.clear_session()
    tf.reset_default_graph()

    model = load_model(model_path)
    return model
Esempio n. 24
0
def build_LSTM_model(trainData, trainBatches, testData, testBatches, windowSize, class_count, numCalls, batch_size):
    # Specify number of units
    # https://stackoverflow.com/questions/37901047/what-is-num-units-in-tensorflow-basiclstmcell#39440218
    num_units = 128

    embedding_size = 256

    # set time steps to be 1, instead of windowSize
    time_step = 1

    # https://keras.io/callbacks/#earlystopping
    early_stop = cb.EarlyStopping(monitor='sparse_categorical_accuracy', min_delta = 0.0001, patience = 3)

    # reshape train data and test data 
    # reshape dataset into the api of the previous [look_back] api's and Y is the label of this api 
    look_back = api_count
    trainX, trainY = create_dataset(trainData, look_back)
    testX, testY = create_dataset(testData, look_back)

    # reshape input to be [samples, time steps, features]
    
    trainX = numpy.reshape(trainX, (trainX.shape[0], time_step, trainX.shape[1]))
    testX = numpy.reshape(testX, (testX.shape[0], time_step, testX.shape[1]))

    model = Sequential()

    # We need to add an embedding layer because LSTM (at this moment) that the API call indices (numbers)
    # are of some mathematical significance. E.g., system call 2 is "closer" to system calls 3 and 4.
    # But system call numbers have nothing to do with their semantic meaning and relation to other
    # system calls. So we transform it using an embedding layer so the LSTM can figure these relationships
    # out for itself.
    # https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html

    # https://stackoverflow.com/questions/40695452/stateful-lstm-with-embedding-layer-shapes-dont-match
    api_count = numCalls+1  # +1 because 0 is our padding number
    # input one api at a time
    model.add(Embedding(input_dim=api_count, output_dim=256, input_length=time_step))

    # https://keras.io/layers/recurrent/#lstm
#   model.add(LSTM(num_units,input_shape=(windowSize, api_count),return_sequences=False))
    #TODO - GPU stuffs
    # input one api at a time, and look at previous [look_back] api's
    model.add(CuDNNLSTM(num_units,input_shape=(time_step, look_back),return_sequences=False))

    # NOTE:  If I want to add more layers
    # https://stackoverflow.com/questions/40331510/how-to-stack-multiple-lstm-in-keras

    # https://keras.io/layers/core/#dense
    model.add(Dense(128))
    # https://keras.io/activations/
    model.add(Activation('relu'))

    # https://keras.io/layers/core/#dropout
    model.add(Dropout(0.5))

    model.add(Dense(class_count, name='logits'))
    model.add(Activation('softmax'))

    # Which optimizer to use
    # https://keras.io/optimizers/
    opt = optimizers.RMSprop(lr=0.01,decay=0.001)

    # https://keras.io/models/model/#compile
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=opt,
        # Metrics to print
        # We use sparse_categorical_accuracy as opposed to categorical_accuracy
        # because: https://stackoverflow.com/questions/44477489/keras-difference-between-categorical-accuracy-and-sparse-categorical-accuracy
        # I.e., since we don't use hot-encoding, we use sparse_categorical_accuracy
        metrics=['sparse_categorical_accuracy'])

    # https://keras.io/models/model/#fit_generator
    hist = model.fit_generator(
        # Data to train
        (trainX, trainY), 
        # Use multiprocessing because python Threading isn't really
        # threading: https://docs.python.org/3/glossary.html#term-global-interpreter-lock
        use_multiprocessing = True,
        # Number of steps per epoch (this is how we train our large
        # number of samples dataset without running out of memory)
        steps_per_epoch = trainBatches,
        #TODO
        # Number of epochs
        epochs = 100,
        # Validation data (will not be trained on)
        validation_data = (testX, testY),
        validation_steps = testBatches,
        # Do not shuffle batches.
        shuffle = False,
        # List of callbacks to be called while training.
        callbacks = [early_stop])

    return model, hist
Esempio n. 25
0
	print(coord)
sys.exit(1)
'''

model = Sequential()

# model.add(BatchNormalization())

# model.add(Dense(outputs, input_shape=(time_window + 1, feature_count)))

# model.add(LSTM(units=256, input_shape=(time_window + 1, feature_count), return_sequences=True))
model.add(BatchNormalization(input_shape=(time_window + 1, feature_count)))
model.add(Dropout(0.2))
model.add(
    CuDNNLSTM(units=256,
              input_shape=(time_window + 1, feature_count),
              return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(outputs))
# model.add(CuDNNLSTM(units=outputs, return_sequences=False))
# model.add(CuDNNGRU(units=outputs, input_shape=(time_window + 1, feature_count), return_sequences=False))
# model.add(LSTM(units=outputs))

# model.add(Dense(outputs))
# model.add(Activation('softmax'))
# opt = RMSprop(0.001)
opt = SGD()
model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy'])

date = str(datetime.datetime.now().isoformat())
Esempio n. 26
0
def get_model(maxlen, max_features, embed_size, embedding_matrix, n_classes):
    sequence_input = Input(shape=(maxlen, ))

    # fast_embedding = tf.keras.layers.Embedding(max_features, embed_size,
    #                                           embeddings_initializer=tf.keras.initializers.Constant(fast_embedding_matrix),
    #                                           trainable=False)
    # glove_embedding = tf.keras.layers.Embedding(max_features,
    #                                             embed_size,
    #                                             embeddings_initializer=tf.keras.initializers.Constant(glove_embedding_matrix),
    #                                             trainable=False)
    #
    # embedding_model = tf.keras.Sequential([tf.keras.layers.Input(shape=(maxlen,), dtype='int32'),
    #                              DynamicMetaEmbedding([fast_embedding, glove_embedding])])

    embedding = Embedding(max_features,
                          embed_size,
                          weights=[embedding_matrix],
                          trainable=False)(sequence_input)

    # x = DynamicMetaEmbedding([fast_embedding, glove_embedding])()

    x = SpatialDropout1D(0.3)(embedding)
    x1 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x)
    x2 = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x1)
    x3 = Conv1D(64,
                kernel_size=2,
                padding="valid",
                kernel_initializer="he_uniform")(x2)
    max_pool1 = GlobalMaxPooling1D()(x1)
    max_pool2 = GlobalMaxPooling1D()(x2)
    max_pool3 = GlobalMaxPooling1D()(x3)
    x = concatenate([max_pool1, max_pool2, max_pool3])

    # x1 = SpatialDropout1D(0.2)(x)
    #
    # x = Bidirectional(CuDNNGRU(256, return_sequences=True))(embedding)
    #
    # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x)
    #
    # x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x)
    #
    # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x)
    #
    # y = Bidirectional(CuDNNLSTM(256, return_sequences=True))(embedding)
    #
    # y = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(y)
    #
    # y = Bidirectional(CuDNNLSTM(128, return_sequences=True))(y)
    #
    # y = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(y)
    #
    # avg_pool1 = GlobalAveragePooling1D()(x)
    #
    # max_pool1 = GlobalMaxPooling1D()(x)
    #
    # avg_pool2 = GlobalAveragePooling1D()(y)
    #
    # max_pool2 = GlobalMaxPooling1D()(y)
    #
    # x = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2])

    preds = Dense(n_classes, activation="softmax")(x)
    model = Model(sequence_input, preds)

    return model
Y_train = training_dataframe['target'].values
Y_test = test_labels_dataframe['target'].values

max_length = 50
tokenizer = Tokenizer()
tokenizer.fit_on_texts(training_dataframe.tweet.values)
train_tweet_seq = tokenizer.texts_to_sequences(training_dataframe.tweet.values)
train_tweet_seq_padded = pad_sequences(train_tweet_seq, maxlen=max_length)
test_tweet_seq = tokenizer.texts_to_sequences(test_dataframe.tweet.values)
test_tweet_seq_padded = pad_sequences(test_tweet_seq, maxlen=max_length)

vocab_size = len(tokenizer.word_index) + 1

inputs = Input(shape=(max_length, ))
embedding_layer = Embedding(vocab_size, 20, input_length=max_length)(inputs)
x = CuDNNLSTM(64)(embedding_layer)
x = Dense(64, activation='relu')(x)
x = Dropout(0.8)(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[inputs], outputs=predictions)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['acc'])
epochs = 3

model_history = model.fit([train_tweet_seq_padded],
                          batch_size=128,
                          y=to_categorical(Y_train),
                          verbose=1,
                          epochs=epochs)
predicted = model.predict(test_tweet_seq_padded)
Esempio n. 28
0
def get_three_entrys_model(maxlen, max_features, embed_size, embedding_matrix,
                           n_classes):
    sequence_input = Input(shape=(maxlen, ))
    # small_sequence_input = Input(shape=(6,))
    features_input = Input(shape=(20, ))
    # hash_input = Input(shape=(max_features,))

    embedding_1 = Embedding(max_features,
                            embed_size,
                            weights=[embedding_matrix],
                            trainable=True,
                            name='embedding_layer')(sequence_input)

    x = SpatialDropout1D(0.3)(embedding_1)
    x1 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x)
    x2 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x1)
    x3 = Conv1D(64,
                kernel_size=2,
                padding="valid",
                kernel_initializer="he_uniform")(x2)
    x4 = Conv1D(64,
                kernel_size=2,
                padding="valid",
                kernel_initializer="he_uniform")(x1)

    max_pool1 = GlobalMaxPooling1D()(x1)
    max_pool2 = GlobalMaxPooling1D()(x2)
    max_pool3 = GlobalMaxPooling1D()(x3)
    max_pool4 = GlobalMaxPooling1D()(x4)

    # x1 = SpatialDropout1D(0.3)(embedding_1)
    #
    # x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x1)
    #
    # x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
    #
    # x = AttentionWithContext()(x)
    # dense_attention = Dense(64, activation="relu")(x)

    # average_pool_attention = GlobalAveragePooling1D()(x)

    # x1 = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x)
    # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x1)
    # max_pool1 = GlobalMaxPooling1D()(x)

    # embedding_2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False,
    #                         name='small_embedding_layer')(small_sequence_input)
    #
    x = SpatialDropout1D(0.3)(embedding_1)

    # x1 = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x)
    # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x1)
    # max_pool2 = GlobalMaxPooling1D()(x)

    x1 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x)
    x2 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x1)
    x3 = Conv1D(64,
                kernel_size=2,
                padding="valid",
                kernel_initializer="he_uniform")(x2)

    avg_pool4 = GlobalAveragePooling1D()(x1)
    avg_pool5 = GlobalAveragePooling1D()(x2)
    max_pool6 = GlobalMaxPooling1D()(x2)
    max_pool7 = GlobalMaxPooling1D()(x3)

    x_concat = concatenate([
        max_pool1, max_pool2, max_pool3, max_pool4, avg_pool4, avg_pool5,
        max_pool6, max_pool7
    ])
    dense_1 = Dense(768, activation='relu')(x_concat)
    dense_2 = Dense(768, activation='relu')(x_concat)

    x_concat_2 = concatenate([x_concat, dense_1, dense_2])

    features_dense = Dense(768, activation="relu")(features_input)

    # hash_dense = Dense(512,activation='relu')(hash_input)

    x = concatenate([x_concat_2, features_dense])

    # x = concatenate([max_pool1, max_pool2,features_dense])
    # x = Dense(128, activation='relu')(concat)
    # x = Dropout(0.1)(x)
    # x = BatchNormalization()(x)
    #
    # x = concatenate([concat, x])

    preds = Dense(n_classes, activation="softmax")(x)
    model = Model(inputs=[sequence_input, features_input], outputs=preds)

    return model
Esempio n. 29
0
def simple_LSTM_model(look_back):
	model = Sequential()
	model.add(CuDNNLSTM(64, input_shape=(look_back,1)))
	model.add(Dense(1,activation='sigmoid'))

	return model
Esempio n. 30
0
    def get_model_defination(self, dataset, embeddings):
        try:

            # Build the model
            print('Building the model...')
            lstm_dim = 64

            ### token embedding layer
            # unused , left for compatability
            char_input = Input(
                shape=[dataset.abs_len, dataset.maxlen, dataset.maxlen_word],
                dtype='int32',
                name='char_input')
            if self.embedding:
                main_input = Input(shape=[dataset.abs_len, dataset.maxlen],
                                   dtype='int32',
                                   name='input')  # (None, 35, 180)
                main_input_r = Lambda(lambda x: K.reshape(
                    x, shape=(-1, dataset.maxlen)))(main_input)
                embeds, _, _ = embeddings.init_weights(dataset.idx2word)
                embed = Embedding(input_dim=dataset.vocsize,
                                  output_dim=embeddings.embed_dim,
                                  input_length=dataset.maxlen,
                                  weights=[embeds],
                                  mask_zero=False,
                                  name='embedding',
                                  trainable=True)(main_input_r)
                token_embedding_layer = embed
            else:
                main_input = Input(shape=[dataset.abs_len, dataset.maxlen],
                                   dtype='float32',
                                   name='input')  # (None, 35, 180)
                main_input_r = Lambda(lambda x: K.reshape(
                    x, shape=(-1, dataset.maxlen, 1)))(main_input)
                token_embedding_layer = main_input_r

            ### sentence encoding layer
            if self.encoding:
                blstm_layer = Bidirectional(
                    CuDNNLSTM(lstm_dim,
                              return_sequences=True))(token_embedding_layer)
                attention_layer = SeqSelfAttention(
                    attention_activation='sigmoid')(blstm_layer)
                sentence_encoding_layer = attention_layer
            else:
                sentence_encoding_layer = token_embedding_layer

            ### context enriching layer
            if self.enriching:
                biLSTM = Bidirectional(
                    CuDNNLSTM(lstm_dim,
                              return_sequences=False))(sentence_encoding_layer)
                biLSTM = Dropout(0.5)(biLSTM)

                biLSTM_r = Lambda(lambda x: K.reshape(
                    x, shape=(-1, dataset.abs_len, 2 * lstm_dim)))(biLSTM)

                norm = BatchNormalization()(biLSTM_r)
                abstract_processing_layer = Dense(dataset.nclasses,
                                                  name='feed_forword')(norm)
            else:
                abs_layer_in = Lambda(lambda x: K.reshape(
                    x,
                    shape=(-1, dataset.abs_len, 2 * lstm_dim * dataset.maxlen))
                                      )(sentence_encoding_layer)
                feedforward = Dense(dataset.maxlen,
                                    name='feed_forword_1')(abs_layer_in)
                norm = BatchNormalization()(feedforward)
                abstract_processing_layer = Dense(dataset.nclasses,
                                                  name='feed_forword')(norm)

            ### label sequence optimazion layer
            if self.optimazion:
                final_output = CRF(
                    dataset.nclasses,
                    learn_mode='marginal',
                    sparse_target=True)(abstract_processing_layer)
            else:
                final_output = Activation('softmax')(
                    abstract_processing_layer)  # (None, 35, 4)

            model = Model(inputs=[main_input, char_input],
                          outputs=final_output,
                          name='output')
            model.compile(optimizer='adam',
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            # model.summary()
            # plot_model(model, to_file='model.png', show_shapes=True)

        except Exception as e:

            # model.summary()
            # plot_model(model, to_file='model.png', show_shapes=True)

            traceback.print_exc()
            pdb.set_trace()

        return model