def get_model_defination(self, dataset, embeddings): # try: # Build the model print('Building the model...') main_input = Input(shape=[dataset.abs_len, dataset.maxlen], dtype='int32', name='input') # (None, 36) char_input = Input( shape=[dataset.abs_len, dataset.maxlen, dataset.maxlen_word], dtype='int32', name='char_input') # (None, 36, 25) # print 'passed checkpoint 1: input\n\n' # pdb.set_trace() main_input_r = Lambda( lambda x: K.reshape(x, shape=(-1, dataset.maxlen)))(main_input) char_input_r = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.maxlen, dataset.maxlen_word)))(char_input) embeds, _, _ = embeddings.init_weights(dataset.idx2word) # print 'passed checkpoint 2: embedding init\n\n' embed = Embedding(input_dim=dataset.vocsize, output_dim=embeddings.embed_dim, input_length=dataset.maxlen, weights=[embeds], mask_zero=False, name='embedding', trainable=True)(main_input_r) embed = Lambda(lambda x: K.reshape( x, shape=[-1, dataset.abs_len, dataset.maxlen, embeddings.embed_dim]) )(embed) # embed = Dropout(0.5, name='embed_dropout')(embed) char_embed = Embedding( input_dim=dataset.charsize, output_dim=embeddings.char_embed_dim, embeddings_initializer='lecun_uniform', input_length=[dataset.maxlen, dataset.maxlen_word], mask_zero=False, name='char_embedding')(char_input_r) char_embed_shape = char_embed.shape char_embed = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.maxlen_word, embeddings.char_embed_dim)))( char_embed) # pdb.set_trace() biLSTM_char_embed = Bidirectional( CuDNNLSTM(embeddings.char_embed_dim, return_sequences=False))(char_embed) # fwd_state = GRU(150, return_state=True)(char_embed)[-2] # bwd_state = GRU(150, return_state=True, go_backwards=True)(char_embed)[-2] # biLSTM_char_embed = Concatenate(axis=-1)([fwd_state, bwd_state]) char_embed = Lambda( lambda x: K.reshape(x, shape=[ -1, dataset.abs_len, char_embed_shape[1], 2 * embeddings.char_embed_dim ]))(biLSTM_char_embed) # char_embed = Dropout(0.5, name='char_embed_dropout')(char_embed) # pdb.set_trace() combined_embed = Concatenate(name='Sum')([embed, char_embed]) combined_embed = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.maxlen, (2 * embeddings.char_embed_dim + embeddings.embed_dim))))( combined_embed) # biLSTM_embed= Bidirectional(LSTM(64, return_sequences=True))(combined_embed) # with_atention = AttentionWithContext()(biLSTM_embed) # with_atention = Lambda(lambda x: K.reshape(x, shape=(-1, dataset.abs_len, 2*64 )))(with_atention) biLSTM = Bidirectional(CuDNNLSTM( 64, return_sequences=False))(combined_embed) biLSTM = Dropout(0.5)(biLSTM) biLSTM_r = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.abs_len, 2 * 64)))(biLSTM) norm = BatchNormalization()(biLSTM_r) feedforward = Dense(dataset.nclasses, name='feed_forword')(norm) final_output = CRF(dataset.nclasses, learn_mode='marginal', sparse_target=True)(feedforward) # final_output = Activation('softmax')(feedforward) # (None, 36, 5) # (None, 36, 5) # print 'passed checkpoint 7: Final_classifier\n\n' model = Model(inputs=[main_input, char_input], outputs=final_output, name='output') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # model.summary() # plot_model(model, to_file='model.png', show_shapes=True) # except Exception as e: # # print 'passed checkpoint E1\n\n' # # model.summary() # # plot_model(model, to_file='model.png', show_shapes=True) # traceback.print_exc() # pdb.set_trace() return model
# # Training # In[ ]: from keras.models import Sequential from keras.layers import CuDNNLSTM, Dense, Bidirectional # In[ ]: model = Sequential() model.add(Bidirectional(CuDNNLSTM(64, return_sequences=True), input_shape=(30, 300))) model.add(Bidirectional(CuDNNLSTM(64))) model.add(Dense(1, activation="sigmoid")) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # In[ ]: mg = batch_gen(train_df) model.fit_generator(mg, epochs=20, steps_per_epoch=1000,
print('Shape of embedding matrix:', embedding_matrix.shape) # ### Initialization print('Build model...') model = Sequential() model.add( Embedding(num_words, embedding_dim, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)) #model.add(Dropout(0.2)) model.add(CuDNNLSTM(128, return_sequences=True)) model.add(CuDNNLSTM(128)) model.add(Dense(128, activation='relu')) model.add(Dense(NLABELS, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop') print(model.summary()) # ### Learning epochs = 20 batch_size = 16 history = model.fit(x_train,
## 将每个序列调整为相同的长度 train_seq_mat = sequence.pad_sequences(train_seq, maxlen=max_len) val_seq_mat = sequence.pad_sequences(val_seq, maxlen=max_len) test_seq_mat = sequence.pad_sequences(test_seq, maxlen=max_len) print("数据转换序列") print(train_seq_mat.shape) print(val_seq_mat.shape) print(test_seq_mat.shape) print(train_seq_mat[:2]) #-------------------------------第五步 建立BiLSTM模型-------------------------- num_labels = 4 model = Sequential() model.add(Embedding(max_words + 1, 128, input_length=max_len)) model.add(Bidirectional(CuDNNLSTM(128))) model.add(Dense(128, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(num_labels, activation='softmax')) model.summary() model.compile( loss="categorical_crossentropy", optimizer='adam', # RMSprop() metrics=["accuracy"]) #-------------------------------第六步 模型训练和预测-------------------------- ## 先设置为train训练 再设置为test测试 flag = "test" if flag == "train": print("模型训练") ## 模型训练 当val-loss不再提升时停止训练 0.0001
def get_model(training, img_h, nclass): input_shape = (None, img_h, 1) # (128, 64, 1) #input_shape = (280, img_h, 1) # Make Networkw inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) #inner = resnet.ResNet50(include_top=False, weights = None, input_tensor = inputs) inner = shufflenet.ShuffleNet_V2(include_top=False, weights=None, input_tensor=inputs) # Convolution layer (VGG) # CNN to RNN #inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = TimeDistributed(Flatten(), name='flatten')(inner) #inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) lstm_unit_num = 256 # RNN layer lstm_1 = CuDNNLSTM(lstm_unit_num, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512) lstm_1b = CuDNNLSTM(lstm_unit_num, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner) lstm1_merged = add([lstm_1, lstm_1b]) # (None, 32, 512) lstm1_merged = BatchNormalization()(lstm1_merged) #lstm1_merged = Dropout(0.1)(lstm1_merged) lstm_2 = CuDNNLSTM(lstm_unit_num, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged) lstm_2b = CuDNNLSTM(lstm_unit_num, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged) lstm2_merged = concatenate([lstm_2, lstm_2b]) # (None, 32, 1024) lstm_merged = BatchNormalization()(lstm2_merged) #lstm_merged = Dropout(0.1)(lstm_merged) # transforms RNN output to character activations: inner = Dense(nclass, kernel_initializer='he_normal', name='dense2')(lstm2_merged) #(None, 32, 63) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[None], dtype='float32') # (None ,8) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1) model = None if training: model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) else: model = Model(inputs=inputs, outputs=y_pred) return model, model model.summary() multi_model = multi_gpu_model(model, gpus=GPU_NUM) save_model = model ada = Adadelta() #multi_model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adam', metrics=['accuracy']) multi_model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada, metrics=['accuracy']) return save_model, multi_model
X_test = X[test_idx] y_test = ratings[test_idx] val_ratio = 0.1 print('Training data size: {}'.format(X_train.shape)) print('Test data size: {}'.format(X_test.shape)) print('Validation ratio: {} % of training data'.format(val_ratio * 100)) vocab_size = 5000 embedding_size = 32 # define model model = Sequential() model.add(Embedding(vocab_size, embedding_size, input_length=max_review_length)) model.add(CuDNNLSTM(128)) model.add(Dense(1, activation=None)) optim = optimizers.Adam(lr=0.001, decay=0.001) model.compile(loss='mse', optimizer='adam', metrics=['mse']) tensorboard = TensorBoard(log_dir='./logs', write_graph=True) earlystopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto') model.fit(X_train, y_train, batch_size=64, epochs=20,
def build_model(size_embeddings, window_length, number_words, number_positions, number_labels, embeddings=None): LSTM_UNITS = 128 DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS if embeddings is not None: size_embeddings = embeddings.shape[1] embedding_layer = Embedding(number_words, size_embeddings, weights=[embeddings], input_length=window_length, trainable=False, name='embedded_words') else: embedding_layer = Embedding(number_words, size_embeddings, input_length=window_length, trainable=False, name='embedded_words') embedding_distance_layer = Embedding(number_positions, 50, input_length=window_length, trainable=True, name='embedded_distances') sequence_sent_input = Input(shape=(window_length, ), dtype='int32', name='sequence_words') embedded_sent = embedding_layer(sequence_sent_input) embedded_sent = Dropout(0.3)(embedded_sent) sequence_dist_input = Input(shape=(window_length, ), dtype='int32', name='sequence_distances') embedded_dist = embedding_distance_layer(sequence_dist_input) merged = concatenate([embedded_sent, embedded_dist]) x = SpatialDropout1D(0.3)(merged) x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x) x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x) hidden = concatenate([ GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x), ]) hidden = add( [hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)]) hidden = add( [hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)]) merged = Dense(number_labels, activation='softmax')(hidden) model = Model(inputs=[sequence_sent_input, sequence_dist_input], outputs=[merged]) model.summary() return model
from keras.initializers import orthogonal np.random.seed(123) #%% in_dim = LSTM_inputs_train_data.shape[2] out_dim = num_classes hidden_size = 125 batch_size = 302 epochs = 100 model = Sequential() model.add( CuDNNLSTM(hidden_size, return_sequences=False, batch_input_shape=(None, time_length, in_dim), kernel_initializer=glorot_uniform(seed=123), recurrent_initializer=orthogonal(gain=1.0, seed=123))) model.add( Dense(out_dim, activation='softmax', kernel_initializer=glorot_uniform(seed=123))) Adamax = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) model.compile(loss='kullback_leibler_divergence', optimizer=Adamax, metrics=['categorical_accuracy'])
valid_y = pd.DataFrame(all_answer[30000:35000].tolist(), columns=['label']) #test_y = pd.DataFrame(all_answer[35000:].tolist(),columns=['label']) train_y = pd.get_dummies(train_y['label']).values valid_y = pd.get_dummies(valid_y['label']).values #test_y = pd.get_dummies(test_y['label']).values maxvalue = train_x.max() minvalue = train_x.min() div = maxvalue - minvalue train_x = (train_x - minvalue) / div valid_x = (valid_x - minvalue) / div test_x = (test_x - minvalue) / div model = Sequential() model.add(CuDNNLSTM(100, input_shape=(train_x.shape[1], train_x.shape[2]))) #model.add(Dropout(0.2)) model.add(Dense(9, activation='softmax')) # 输出层 adam = optimizers.adam(lr=0.005) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) # model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True,dropout=dropout)) model.summary() # fit network history = model.fit(train_x, train_y, epochs=50, batch_size=72, validation_data=(valid_x, valid_y),
def build_models(self): def last_image(tensor): return tensor[:, -1, :] image_input = Input(shape=(self.state_len, self.height, self.width, 3)) xf = TimeDistributed( Conv2D(90, (9, 9), activation='relu', padding='same'))(image_input) xf = TimeDistributed(MaxPooling2D((2, 2)))(xf) xf = TimeDistributed(BatchNormalization())(xf) print(xf.shape) xf = TimeDistributed( Conv2D(60, (6, 6), activation='relu', padding='same'))(xf) xf = TimeDistributed(MaxPooling2D((2, 2)))(xf) xf = TimeDistributed(BatchNormalization())(xf) print(xf.shape) xf = TimeDistributed( Conv2D(60, (5, 5), activation='relu', padding='same'))(xf) xf = TimeDistributed(MaxPooling2D((3, 3)))(xf) features = TimeDistributed(Flatten())(xf) print('feature shape is: ', features.shape) feature_model = Model(image_input, features) state_current = Input(shape=(self.state_len, self.height, self.width, 3), name='state_current') state_next = Input(shape=(self.state_len, self.height, self.width, 3), name='state_next') feature_current = feature_model(state_current) last_feature = Lambda(last_image)(feature_current) feature_dimension = int(last_feature.shape[1]) feature_next = feature_model(state_next) inverse_input = Concatenate()([feature_current, feature_next]) xi = TimeDistributed(Dense(feature_dimension, activation='relu'))(inverse_input) xi = TimeDistributed(Dense(feature_dimension, activation='relu'))(xi) xi = CuDNNLSTM(50, return_sequences=False)(inverse_input) xi = Dense(50, activation='relu')(xi) inverse_output = Dense(8, activation='softmax', name='inverse_output')(xi) input_action = Input(shape=(8, ), name='action') recurrent_branch = CuDNNLSTM(50, return_sequences=False)(feature_current) forward_input = Concatenate()( [last_feature, input_action, recurrent_branch]) xfo = Dense(feature_dimension, activation='relu')(forward_input) xfo = Dense(feature_dimension, activation='relu')(xfo) forward_output = Dense(feature_dimension, activation='relu')(xfo) last_feature_next = Lambda(last_image)(feature_next) icm = Model(inputs=[input_action, state_current, state_next], outputs=[inverse_output]) def icm_loss(ytrue, ypred): return self.beta * K.mean( 0.5 * K.square(forward_output - last_feature_next), axis=1) + ( 1 - self.beta) * K.categorical_crossentropy(ytrue, ypred) icm.compile(loss=icm_loss, optimizer=self.adam2, metrics=['accuracy']) ireward_output = Lambda( lambda x: K.mean(0.5 * K.square(x[0] - x[1]), axis=1))( [forward_output, last_feature_next]) ireward = Model(inputs=[input_action, state_current, state_next], outputs=ireward_output) #main_input = Input(shape=(self.state_len, self.height, self.width, 3)) x = TimeDistributed(Dense(feature_dimension, activation='relu'))(feature_current) x = TimeDistributed(Dense(feature_dimension, activation='relu'))(x) reward_input = Input(shape=(8, )) x = Dense(50, activation='relu')(x) main_output = Dense(8, activation='softmax')(x) main_model = Model([state_current, reward_input], main_output) main_model.add_loss(self.sample_loss(main_output, reward_input)) main_model.compile(optimizer=self.adam1) return main_model, icm, feature_model, ireward
def create_rnn_model(rnnModel, rnn_type, inputSize, outputShape): """ Function to create my rnn neural network Arguments: rnnModel: keras rnnModel type: string input: choose model: GRU, LSTM inputSize: training input size with shape (time_length,features) outputShape: a training output shape (h,w,colorChannel) colorChannel should be 1 here Return: model after set up """ # If doesn't given rnn_type and inputSize return false # if rnn_type and inputSize: # sys.exit() if (rnn_type == 'GRU'): rnnModel.add( CuDNNGRU(units=64, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=True, return_state=False, stateful=False, input_shape=inputSize)) if (rnn_type == 'LSTM'): rnnModel.add( CuDNNLSTM(units=64, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=True, return_state=False, stateful=False, input_shape=inputSize)) print(np.prod(outputShape)) # Can try leakyRelu here rnnModel.add( Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01))) rnnModel.add( Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01))) rnnModel.add( Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))) rnnModel.add(Dense(outputShape[1], activation='relu')) rnnModel.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy']) return rnnModel
embedding_matrix[index] = embedding_vector ''' model = Sequential() model.add(Embedding(config.vocab_size, 100, input_length=config.maxlen, weights=[embedding_matrix], trainable=False)) model.add(Flatten()) model.add(Dense(100, activation="relu")) model.add(Dense(100,activation="relu")) model.add(Dropout(0.6)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) ''' model = Sequential() model.add(Embedding(config.vocab_size, 100, input_length=config.maxlen)) model.add(CuDNNLSTM(config.hidden_dims)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.fit(X_train, y_train, batch_size=config.batch_size, epochs=config.epochs, validation_data=(X_test, y_test), callbacks=[WandbCallback()])
def build_model2(lr=0.0, lr_d=0.0, units=0, spatial_dr=0.0, kernel_size1=3, kernel_size2=2, dense_units=128, dr=0.1, conv_size=32): file_path = "best_model.hdf5" check_point = ModelCheckpoint(file_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min") early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=3) inp = Input(shape=(max_len, )) x = Embedding(19479, embed_size, weights=[embedding_matrix], trainable=False)(inp) x1 = SpatialDropout1D(spatial_dr)(x) x_gru = Bidirectional(CuDNNGRU(units, return_sequences=True))(x1) x_lstm = Bidirectional(CuDNNLSTM(units, return_sequences=True))(x1) x_conv1 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(x_gru) avg_pool1_gru = GlobalAveragePooling1D()(x_conv1) max_pool1_gru = GlobalMaxPooling1D()(x_conv1) x_conv2 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(x_gru) avg_pool2_gru = GlobalAveragePooling1D()(x_conv2) max_pool2_gru = GlobalMaxPooling1D()(x_conv2) x_conv3 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(x_lstm) avg_pool1_lstm = GlobalAveragePooling1D()(x_conv3) max_pool1_lstm = GlobalMaxPooling1D()(x_conv3) x_conv4 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(x_lstm) avg_pool2_lstm = GlobalAveragePooling1D()(x_conv4) max_pool2_lstm = GlobalMaxPooling1D()(x_conv4) x = concatenate([ avg_pool1_gru, max_pool1_gru, avg_pool2_gru, max_pool2_gru, avg_pool1_lstm, max_pool1_lstm, avg_pool2_lstm, max_pool2_lstm ]) x = BatchNormalization()(x) x = Dropout(dr)(Dense(dense_units, activation='relu')(x)) x = BatchNormalization()(x) x = Dropout(dr)(Dense(int(dense_units / 2), activation='relu')(x)) x = Dense(5, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy"]) history = model.fit(X_train, y_ohe, batch_size=128, epochs=20, validation_split=0.1, verbose=1, callbacks=[check_point, early_stop]) model = load_model(file_path) return model
def __init__(self, config, pretrained_embedding): self._input = tf.placeholder(dtype=tf.int32,shape=[None,None],name='input') self._target = tf.placeholder(dtype=tf.int32,shape=[None],name='target') self.batch_size = config['batch_size'] self.num_steps = config['num_steps'] self.embed_size = config['embed_size'] self.size = config['hidden_size'] self._lr = config['lr'] self.num_classes = config['num_classes'] self.keep_prob = tf.Variable(config['keep_prob'],trainable=False) self.combine_mode = config['combine_mode'] self.weight_decay = config['weight_decay'] # # outputs = LSTMEncoderWithEmbedding(self._input,self.embed_size,self.size,\ # config['vocab_size'],self.num_steps,\ # self.keep_prob,embedding=pretrained_embedding,\ # num_layers=config['num_layers'],\ # variational_dropout=True,\ # combine_mode='last').get_output() embed = Embedding(config['vocab_size']+1, self.embed_size)(self._input) outputs = tf.nn.dropout(embed,keep_prob=self.keep_prob) # outputs = Bidirectional(CuDNNLSTM(self.size,return_sequences=True))(outputs) # outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob) outputs = Bidirectional(CuDNNLSTM(self.size,return_sequences=True))(outputs) self.size = int(outputs.get_shape().as_list()[-1]) if self.combine_mode =='weight': outputs = tf.reshape(outputs,[-1,self.size]) weights = Dense(1,activation='tanh')(outputs) outputs = tf.multiply(outputs,weights) outputs = tf.reshape(outputs,[-1,self.num_steps,self.size]) outputs = tf.reduce_sum(outputs,axis=1) elif self.combine_mode =='last': outputs = outputs[:,-1,:] elif self.combine_mode =='all': weights = Dense(1,activation='tanh')(outputs) outputs_weighted = tf.multiply(outputs,weights) outputs_weighted = tf.reshape(outputs_weighted,[-1,self.num_steps,2*self.size]) outputs_weighted = tf.reduce_sum(outputs_weighted,axis=1) outputs_last = outputs[:,-1,:] outputs_mean = tf.reduce_mean(outputs,axis=1) outputs_max = tf.reduce_max(outputs,axis=1) outputs_min = tf.reduce_min(outputs,axis=1) outputs = tf.concat([outputs_last,outputs_mean,outputs_max,outputs_min,outputs_weighted],axis=-1) # outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob) embed_avg = tf.reduce_mean(embed,axis=1) # embed_max = tf.reduce_max(embed,axis=1) # embed_min = tf.reduce_min(embed,axis=1) # outputs = tf.concat([outputs,embed_avg,embed_min,embed_max],axis=-1) outputs = tf.concat([outputs,embed_avg] ,axis=-1) # outputs = tf.contrib.layers.fully_connected(outputs,self.size) # outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob) # softmax_w = tf.get_variable("softmax_w", [self.size, self.num_classes], dtype=tf.float32) # softmax_b = tf.get_variable("softmax_b", [self.num_classes], dtype=tf.float32) # logits = tf.matmul(outputs, softmax_w) + softmax_b logits = Dense(self.num_classes,activation=None)(outputs) # update the cost variables loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self._target,logits=logits) self.l2_loss = sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() ) self._cost = cost = tf.reduce_mean(loss) + self.weight_decay*self.l2_loss self._lr = tf.Variable(self._lr, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config['max_grad_norm']) optimizer = tf.train.AdamOptimizer(self._lr) # optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars)) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr) self.predicted_class = tf.cast(tf.argmax(tf.nn.softmax(logits),axis=-1),tf.int32)
def getModel(import_model, max_features, maxlen, embedding_size, lstm_size, forget_bias, recurrent_dropout, dropout, stateful, iLSTM, scale_amount, x_train, y_train, x_test, y_test, y_cell_train, y_cell_test, wi_size, batch_size, epochs, embedding_matrix, model_fn, file_name, dataset, use_wv, data_path, trainable, embedding_dropout, word_dropout, use_L2, use_decay, rewrite_scores, learn_rate, use_CNN, filters, kernel_size, pool_size, score_fn, scale_amount_2, two_step=None, prev_model=None, extra_output_layer=None): if two_step is not None and prev_model is None and iLSTM: epochs = 8 print('Build model...') if dataset == 2: output_size = len(y_train[0]) output_activation = "softmax" loss = "categorical_crossentropy" else: output_size = 1 output_activation = "sigmoid" loss = "binary_crossentropy" if use_decay: optimizer = Adam(lr=learn_rate, decay=0.9999) # removed clipping else: optimizer = Adam(lr=learn_rate) if iLSTM: if lstm_size > len(y_cell_test[0]): print(">>> Using spare metrics/nodes") iLSTM_loss = keras.losses.spare_mse iLSTM_metric = keras.metrics.sp_acc else: iLSTM_loss = "mse" iLSTM_metric = "accuracy" tensorboard = TensorBoard(log_dir='/home/tom/Desktop/Logs/' + str(dataset) + "/" + file_name + '/', histogram_freq=0, write_graph=True, write_images=True) model = None print("lstm size", lstm_size) if import_model is None and os.path.exists( model_fn) is False and prev_model is None: print("L0 Input layer", maxlen) sequence_input = Input(shape=(maxlen, ), dtype=np.int32) # if word_dropout > 0.0: sequence_input = Dropout(word_dropout, input_shape=(maxlen, ), dtype=np.int32) prev_layer = sequence_input else: prev_layer = sequence_input if use_wv: print("L1 pre-trained word embeddings", wi_size, embedding_size, maxlen, False, trainable) embedding_layer = Embedding(wi_size, embedding_size, weights=[embedding_matrix], input_length=maxlen, trainable=trainable)(prev_layer) else: print("L1 trainable embeddings", wi_size, embedding_size, maxlen, True) embedding_layer = Embedding(wi_size, embedding_size, input_length=maxlen, trainable=True)(prev_layer) if embedding_dropout > 0.0: dropout_layer = Dropout(embedding_dropout)(embedding_layer) prev_lstm_layer = dropout_layer else: prev_lstm_layer = embedding_layer if use_CNN: prev_conv_layer = prev_lstm_layer conv = Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)(prev_conv_layer) prev_lstm_layer = conv if iLSTM: if dropout > 0.0 or recurrent_dropout > 0.0: print("L2 dropout LSTM", lstm_size, forget_bias, dropout, recurrent_dropout) hidden_layer, h_l2, cell_state = LSTM( units=lstm_size, dropout=dropout, recurrent_dropout=recurrent_dropout, unit_forget_bias=forget_bias, return_state=True, kernel_regularizer=l2(use_L2))(prev_lstm_layer) else: print("L2 no_dropout CuDNNLSTM", lstm_size, forget_bias) hidden_layer, h_l2, cell_state = CuDNNLSTM( units=lstm_size, unit_forget_bias=forget_bias, return_state=True, kernel_regularizer=l2(use_L2))(prev_lstm_layer) else: if dropout > 0.0 or recurrent_dropout > 0.0: print("L2 dropout LSTM", lstm_size, forget_bias, dropout, recurrent_dropout) hidden_layer = LSTM( units=lstm_size, dropout=dropout, recurrent_dropout=recurrent_dropout, unit_forget_bias=forget_bias, kernel_regularizer=l2(use_L2))(prev_lstm_layer) else: print("L2 no_dropout CuDNNLSTM", lstm_size, forget_bias) hidden_layer = CuDNNLSTM( units=lstm_size, unit_forget_bias=forget_bias, kernel_regularizer=l2(use_L2))(prev_lstm_layer) if extra_output_layer: ex_output = Dense(lstm_size, activation="linear")(hidden_layer) hidden_layer = ex_output print("L3 output layer", output_size, output_activation) output_layer = Dense(output_size, activation=output_activation)(hidden_layer) if iLSTM: if extra_output_layer: model = Model(sequence_input, [output_layer, ex_output]) else: model = Model(sequence_input, [output_layer, h_l2]) model.compile( loss=[loss, iLSTM_loss], optimizer=optimizer, metrics=[iLSTM_metric], loss_weights=[1.0 * scale_amount_2, 1.0 * scale_amount]) print('Train...') model.fit(x_train, [y_train, y_cell_train], batch_size=batch_size, epochs=epochs, validation_data=(x_test, [y_test, y_cell_test]), callbacks=[tensorboard]) else: model = Model(sequence_input, output_layer) model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) print('Train...') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), callbacks=[tensorboard]) elif prev_model is not None: print("Two step") model = prev_model model.compile(loss=[loss, iLSTM_loss], optimizer=optimizer, metrics=[iLSTM_metric], loss_weights=[1.0 * two_step[1], 1.0 * two_step[0]]) print('Train...') model.fit(x_train, [y_train, y_cell_train], batch_size=batch_size, epochs=epochs, validation_data=(x_test, [y_test, y_cell_test]), callbacks=[tensorboard]) elif import_model is not None: print("Loading model...") model = load_model(data_path + "model/" + import_model) elif rewrite_scores is True or os.path.exists(score_fn) is False: model = load_model(model_fn) else: model = None return model
name='3_conv_layer')) model.add(ELU()) model.add( Conv2D(num_filters, kernel_size=size_of_kernel, strides=kernel_strides, kernel_initializer='glorot_normal', name='4_conv_layer')) model.add(ELU()) model.add(Reshape((8, num_filters * num_sensors))) model.add( CuDNNLSTM(lstm_output, kernel_initializer='glorot_normal', return_sequences=True, name='1_lstm_layer')) model.add(Dropout(dropout_prob, name='1_dropout_layer')) model.add( CuDNNLSTM(lstm_output, kernel_initializer='glorot_normal', return_sequences=False, name='2_lstm_layer')) model.add(Dropout(dropout_prob, name='2_dropout_layer')) model.add( Dense(512, kernel_initializer='glorot_normal',
valid = values[n_train_hours:8000, :] test = values[8000:10000, :] n_obs = n_hours * n_features train_X, train_y = train[:, :n_obs], train[:, -n_features] valid_X, valid_y = valid[:, :n_obs], valid[:, -n_features] test_X, test_y = test[:, :n_obs], test[:, -n_features] print(train_X.shape, len(train_X), train_y.shape) train_X = train_X.reshape((train_X.shape[0], n_hours, n_features)) valid_X = valid_X.reshape((valid_X.shape[0], n_hours, n_features)) test_X = test_X.reshape((test_X.shape[0], n_hours, n_features)) print(train_X.shape, train_y.shape, test_X.shape, test_y.shape) #--------------------------------------------------------------------------------------------------------------------------- # design network model3 = Sequential() model3.add(CuDNNLSTM(256, input_shape=(train_X.shape[1], train_X.shape[2]))) model3.add(Dense(1)) model3.compile(loss='mae', optimizer='Adam') # fit network history3 = model3.fit(train_X, train_y, epochs=Epoch, batch_size=72, validation_data=(test_X, test_y), verbose=True, shuffle=False) '----------------------------'
y = np.array([ lb[i * stride + input_dim // 2 - output_dim // 2:i * stride + input_dim // 2 - output_dim // 2 + output_dim] for i in range((len(lb) - input_dim) // stride) ]) y = to_categorical(y, num_classes=2) y_train.append(y) timestep = len(x_train[0]) x_train = np.array(x_train) y_train = np.array(y_train) '''build neural''' input = Input(shape=(timestep, input_dim)) classifier = input '''bidirectional LSTM''' o1 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(classifier) o1 = Dropout(0.2)(o1) o1 = BatchNormalization()(o1) o2 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o1) o2 = Add()([o1, o2]) o2 = Dropout(0.2)(o2) o2 = BatchNormalization()(o2) o3 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o2) o3 = Add()([o1, o2, o3]) o3 = Dropout(0.2)(o3) o3 = BatchNormalization()(o3) '''attention model''' oa = TimeDistributed(Dense(filter_size * 2, activation='softmax'))(o3) o3 = Multiply()([o3, oa])
# yxtay way - https://github.com/yxtay/char-rnn-text-generation # model.add(Embedding(len(chars), 32, batch_input_shape=(128, maxlen))) # model.add(Dropout(DROPOUT_VAL)) # model.add(LSTM(LSTM_DIM, return_sequences=False, stateful=True)) # 1 layer only # model.add(LSTM(LSTM_DIM, input_shape=(maxlen, len(chars)), return_sequences=False)) # 3-layers LSTM model.add( CuDNNLSTM(LSTM_DIM, input_shape=(maxlen, len(chars)), return_sequences=True)) model.add(Dropout(DROPOUT_VAL)) model.add(CuDNNLSTM(LSTM_DIM, return_sequences=True)) model.add(Dropout(DROPOUT_VAL)) model.add(CuDNNLSTM(LSTM_DIM)) model.add(Dropout(DROPOUT_VAL)) # model.add(Dense(LSTM_DIM)) # extra Dense # model.add(Dropout(DROPOUT_VAL)) # extra Dense model.add(Dense(len(chars))) model.add(Activation("softmax")) optimizer = Adam(lr=0.001, clipnorm=5.0, clipvalue=0.5) model.compile(loss='categorical_crossentropy', optimizer=optimizer)
def get_layer(inp_a, inp_b, tk): def load_embedding(toka, max_features): def get_coefs(token, *arr): return token, np.asarray(arr, dtype='float32') embedding_index = dict( get_coefs(*o.strip().split(" ")) for o in open(embedding_path, encoding="utf-8")) word_index = toka.word_index nub_words = min(max_features, len(word_index)) embedding_matrix_ = np.zeros((nub_words + 1, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embedding_index.get(word) if embedding_vector is not None: embedding_matrix_[i] = embedding_vector return embedding_matrix_, nub_words def get_pooling(x): avg_pool_x = GlobalAveragePooling1D()(x) max_pool_x = GlobalMaxPooling1D()(x) return avg_pool_x, max_pool_x embedding_matrix, nb_words = load_embedding(tk, 10_0000) embed_layer_a = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False) embed_layer_b = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False) x_a = embed_layer_a(inp_a) x_b = embed_layer_b(inp_b) x_a = SpatialDropout1D(0.3)(x_a) x_b = SpatialDropout1D(0.3)(x_b) xc_a = Bidirectional(CuDNNLSTM(32, return_sequences=True))(x_a) xc_b = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x_b) xc_a_cons = Bidirectional(CuDNNLSTM(32, return_sequences=True))(x_a) xc_b_cons = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x_b) avg_pool_ac3, max_pool_ac3 = get_pooling(xc_a_cons) avg_pool_bc3, max_pool_bc3 = get_pooling(xc_b_cons) x_ac = concatenate([avg_pool_ac3, max_pool_ac3]) x_ac = BatchNormalization()(x_ac) x_ac = Dropout(0.3)(Dense(32, activation='relu')(x_ac)) x_bc = concatenate([avg_pool_bc3, max_pool_bc3]) x_bc = BatchNormalization()(x_bc) x_bc = Dropout(0.3)(Dense(32, activation='relu')(x_bc)) xm = Multiply()([x_ac, x_bc]) xm = BatchNormalization()(xm) xm = Dropout(0.3)(Dense(32, activation='relu')(xm)) x_a_c_3 = Conv1D(32, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(xc_a) x_b_c_3 = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(xc_b) avg_pool_a3, max_pool_a3 = get_pooling(x_a_c_3) avg_pool_b3, max_pool_b3 = get_pooling(x_b_c_3) x_a = concatenate([avg_pool_a3, max_pool_a3]) x_a = BatchNormalization()(x_a) x_a = Dropout(0.3)(Dense(32, activation='relu')(x_a)) x_b = concatenate([avg_pool_b3, max_pool_b3]) x_b = BatchNormalization()(x_b) x_b = Dropout(0.3)(Dense(32, activation='relu')(x_b)) # xm = Multiply()([x_a, x_b]) # xm = BatchNormalization()(xm) # xm = Dropout(0.3)(Dense(32, activation='relu')(xm)) # d1 = Dot(1)([x_a, x_e]) # d2 = Dot(1)([x_a, x_e2]) x = concatenate([x_a, x_b, xm]) x = BatchNormalization()(x) x = Dropout(0.3)(Dense(32, activation='relu')(x)) out = Dense(2, activation="sigmoid")(x) return out
fcst_test_X = fcst_input.reshape((fcst_input.shape[0], 1, fcst_input.shape[1])) print("forecast data:", fcst_test_X.shape, fcst_labels.shape) # set random seeds for model reproducibility as suggested in: # https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development os.environ['PYTHONHASHSEED'] = '0' np.random.seed(42) rn.seed(12345) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) tf.set_random_seed(1234) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) # define model model = Sequential() model.add(CuDNNLSTM(units=n_neurons, unit_forget_bias=True, bias_regularizer=L1L2(l1=0.01, l2=0.01))) # model.add(LSTM(units=n_neurons, activation='tanh', input_shape=(None, train_X.shape[2]), use_bias=True, # bias_regularizer=L1L2(l1=0.01, l2=0.01))) # This is hidden layer model.add(Dropout(.355)) model.add(Dense(activation='linear', units=n_ahead-1, use_bias=True)) # this is output layer adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(loss=rmse, optimizer=adam) earlystop = keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.00000001, patience=5, verbose=1, mode='auto') history = model.fit(train_X, train_y, batch_size=n_batch, epochs=n_epochs, verbose=2, shuffle=False, callbacks=[earlystop]) # plot model history # plt.plot(history.history['loss'], label='train') # plt.xlabel("Epochs") # plt.ylabel("Loss") # plt.tight_layout()
from keras.layers import GlobalMaxPool1D, SpatialDropout1D, MaxPooling1D, Flatten from keras.layers import Bidirectional from keras.models import Model early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=3, verbose=1) print("Building layers") nb_epoch = 25 print('starting to stitch and compile model') # Embedding layer for text inputs input_words = Input((max_len, )) x_words = Embedding(vocab_size, 300, input_length=max_len)(input_words) x_words = CuDNNLSTM(256, return_sequences=True)(x_words) x_words = Dropout(0.20)(x_words) x_words = Conv1D(128, 3, strides=1, activation='relu')(x_words) x_words = GlobalMaxPool1D()(x_words) x_words = Dropout(0.2)(x_words) x = Dense(64, activation="relu")(x_words) # x = Dropout(0.2)(x) predictions = Dense(20, activation="softmax")(x) model = Model(inputs=[input_words], outputs=predictions) model.compile(optimizer='nadam', loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary())
def build_model(lr=0.0, lr_d=0.0): inp_a = Input(shape=(max_len_a,)) inp_b = Input(shape=(max_len_b,)) x_a = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False)(inp_a) x_b = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False)(inp_b) x_a = SpatialDropout1D(0.3)(x_a) x_b = SpatialDropout1D(0.3)(x_b) xc_a = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x_a) xc_b = Bidirectional(CuDNNLSTM(512, return_sequences=True))(x_b) xc_a_3 = Conv1D(16, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(xc_a) xc_a_2 = Conv1D(16, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(xc_a) xc_b_3 = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(xc_b) xc_b_2 = Conv1D(64, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(xc_b) avg_pool_a3 = GlobalAveragePooling1D()(xc_a_3) max_pool_a3 = GlobalMaxPooling1D()(xc_a_3) avg_pool_a2 = GlobalAveragePooling1D()(xc_a_2) max_pool_a2 = GlobalMaxPooling1D()(xc_a_2) avg_pool_b3 = GlobalAveragePooling1D()(xc_b_3) max_pool_b3 = GlobalMaxPooling1D()(xc_b_3) avg_pool_b2 = GlobalAveragePooling1D()(xc_b_2) max_pool_b2 = GlobalMaxPooling1D()(xc_b_2) x_a = concatenate([avg_pool_a3, max_pool_a3, avg_pool_a2, max_pool_a2]) x_a = BatchNormalization()(x_a) x_a = Dropout(0.3)(Dense(32, activation='relu')(x_a)) x_b = concatenate([avg_pool_b3, max_pool_b3, avg_pool_b2, max_pool_b2]) x_b = BatchNormalization()(x_b) x_b = Dropout(0.1)(Dense(64, activation='relu')(x_b)) x = concatenate([x_a, x_b]) x = BatchNormalization()(x) x = Dropout(0.2)(Dense(64, activation='relu')(x)) x = Dense(2, activation="sigmoid")(x) """:fine-tune""" model = Model(inputs=[inp_a, inp_b], outputs=x) model.trainable = True for layer in model.layers[:1]: layer.trainable = False model.summary() """:train""" model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy"]) # model.fit_generator model.fit([X_train_a, X_train_b], y_ohe, batch_size=24, epochs=20, validation_split=0.3, verbose=1, class_weight='auto', callbacks=[check_point, early_stop, tb_cb]) K.clear_session() tf.reset_default_graph() model = load_model(model_path) return model
def build_LSTM_model(trainData, trainBatches, testData, testBatches, windowSize, class_count, numCalls, batch_size): # Specify number of units # https://stackoverflow.com/questions/37901047/what-is-num-units-in-tensorflow-basiclstmcell#39440218 num_units = 128 embedding_size = 256 # set time steps to be 1, instead of windowSize time_step = 1 # https://keras.io/callbacks/#earlystopping early_stop = cb.EarlyStopping(monitor='sparse_categorical_accuracy', min_delta = 0.0001, patience = 3) # reshape train data and test data # reshape dataset into the api of the previous [look_back] api's and Y is the label of this api look_back = api_count trainX, trainY = create_dataset(trainData, look_back) testX, testY = create_dataset(testData, look_back) # reshape input to be [samples, time steps, features] trainX = numpy.reshape(trainX, (trainX.shape[0], time_step, trainX.shape[1])) testX = numpy.reshape(testX, (testX.shape[0], time_step, testX.shape[1])) model = Sequential() # We need to add an embedding layer because LSTM (at this moment) that the API call indices (numbers) # are of some mathematical significance. E.g., system call 2 is "closer" to system calls 3 and 4. # But system call numbers have nothing to do with their semantic meaning and relation to other # system calls. So we transform it using an embedding layer so the LSTM can figure these relationships # out for itself. # https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html # https://stackoverflow.com/questions/40695452/stateful-lstm-with-embedding-layer-shapes-dont-match api_count = numCalls+1 # +1 because 0 is our padding number # input one api at a time model.add(Embedding(input_dim=api_count, output_dim=256, input_length=time_step)) # https://keras.io/layers/recurrent/#lstm # model.add(LSTM(num_units,input_shape=(windowSize, api_count),return_sequences=False)) #TODO - GPU stuffs # input one api at a time, and look at previous [look_back] api's model.add(CuDNNLSTM(num_units,input_shape=(time_step, look_back),return_sequences=False)) # NOTE: If I want to add more layers # https://stackoverflow.com/questions/40331510/how-to-stack-multiple-lstm-in-keras # https://keras.io/layers/core/#dense model.add(Dense(128)) # https://keras.io/activations/ model.add(Activation('relu')) # https://keras.io/layers/core/#dropout model.add(Dropout(0.5)) model.add(Dense(class_count, name='logits')) model.add(Activation('softmax')) # Which optimizer to use # https://keras.io/optimizers/ opt = optimizers.RMSprop(lr=0.01,decay=0.001) # https://keras.io/models/model/#compile model.compile( loss='sparse_categorical_crossentropy', optimizer=opt, # Metrics to print # We use sparse_categorical_accuracy as opposed to categorical_accuracy # because: https://stackoverflow.com/questions/44477489/keras-difference-between-categorical-accuracy-and-sparse-categorical-accuracy # I.e., since we don't use hot-encoding, we use sparse_categorical_accuracy metrics=['sparse_categorical_accuracy']) # https://keras.io/models/model/#fit_generator hist = model.fit_generator( # Data to train (trainX, trainY), # Use multiprocessing because python Threading isn't really # threading: https://docs.python.org/3/glossary.html#term-global-interpreter-lock use_multiprocessing = True, # Number of steps per epoch (this is how we train our large # number of samples dataset without running out of memory) steps_per_epoch = trainBatches, #TODO # Number of epochs epochs = 100, # Validation data (will not be trained on) validation_data = (testX, testY), validation_steps = testBatches, # Do not shuffle batches. shuffle = False, # List of callbacks to be called while training. callbacks = [early_stop]) return model, hist
print(coord) sys.exit(1) ''' model = Sequential() # model.add(BatchNormalization()) # model.add(Dense(outputs, input_shape=(time_window + 1, feature_count))) # model.add(LSTM(units=256, input_shape=(time_window + 1, feature_count), return_sequences=True)) model.add(BatchNormalization(input_shape=(time_window + 1, feature_count))) model.add(Dropout(0.2)) model.add( CuDNNLSTM(units=256, input_shape=(time_window + 1, feature_count), return_sequences=False)) model.add(Dropout(0.5)) model.add(Dense(outputs)) # model.add(CuDNNLSTM(units=outputs, return_sequences=False)) # model.add(CuDNNGRU(units=outputs, input_shape=(time_window + 1, feature_count), return_sequences=False)) # model.add(LSTM(units=outputs)) # model.add(Dense(outputs)) # model.add(Activation('softmax')) # opt = RMSprop(0.001) opt = SGD() model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy']) date = str(datetime.datetime.now().isoformat())
def get_model(maxlen, max_features, embed_size, embedding_matrix, n_classes): sequence_input = Input(shape=(maxlen, )) # fast_embedding = tf.keras.layers.Embedding(max_features, embed_size, # embeddings_initializer=tf.keras.initializers.Constant(fast_embedding_matrix), # trainable=False) # glove_embedding = tf.keras.layers.Embedding(max_features, # embed_size, # embeddings_initializer=tf.keras.initializers.Constant(glove_embedding_matrix), # trainable=False) # # embedding_model = tf.keras.Sequential([tf.keras.layers.Input(shape=(maxlen,), dtype='int32'), # DynamicMetaEmbedding([fast_embedding, glove_embedding])]) embedding = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(sequence_input) # x = DynamicMetaEmbedding([fast_embedding, glove_embedding])() x = SpatialDropout1D(0.3)(embedding) x1 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x) x2 = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x1) x3 = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x2) max_pool1 = GlobalMaxPooling1D()(x1) max_pool2 = GlobalMaxPooling1D()(x2) max_pool3 = GlobalMaxPooling1D()(x3) x = concatenate([max_pool1, max_pool2, max_pool3]) # x1 = SpatialDropout1D(0.2)(x) # # x = Bidirectional(CuDNNGRU(256, return_sequences=True))(embedding) # # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x) # # x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) # # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x) # # y = Bidirectional(CuDNNLSTM(256, return_sequences=True))(embedding) # # y = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(y) # # y = Bidirectional(CuDNNLSTM(128, return_sequences=True))(y) # # y = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(y) # # avg_pool1 = GlobalAveragePooling1D()(x) # # max_pool1 = GlobalMaxPooling1D()(x) # # avg_pool2 = GlobalAveragePooling1D()(y) # # max_pool2 = GlobalMaxPooling1D()(y) # # x = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2]) preds = Dense(n_classes, activation="softmax")(x) model = Model(sequence_input, preds) return model
Y_train = training_dataframe['target'].values Y_test = test_labels_dataframe['target'].values max_length = 50 tokenizer = Tokenizer() tokenizer.fit_on_texts(training_dataframe.tweet.values) train_tweet_seq = tokenizer.texts_to_sequences(training_dataframe.tweet.values) train_tweet_seq_padded = pad_sequences(train_tweet_seq, maxlen=max_length) test_tweet_seq = tokenizer.texts_to_sequences(test_dataframe.tweet.values) test_tweet_seq_padded = pad_sequences(test_tweet_seq, maxlen=max_length) vocab_size = len(tokenizer.word_index) + 1 inputs = Input(shape=(max_length, )) embedding_layer = Embedding(vocab_size, 20, input_length=max_length)(inputs) x = CuDNNLSTM(64)(embedding_layer) x = Dense(64, activation='relu')(x) x = Dropout(0.8)(x) predictions = Dense(num_classes, activation='softmax')(x) model = Model(inputs=[inputs], outputs=predictions) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) epochs = 3 model_history = model.fit([train_tweet_seq_padded], batch_size=128, y=to_categorical(Y_train), verbose=1, epochs=epochs) predicted = model.predict(test_tweet_seq_padded)
def get_three_entrys_model(maxlen, max_features, embed_size, embedding_matrix, n_classes): sequence_input = Input(shape=(maxlen, )) # small_sequence_input = Input(shape=(6,)) features_input = Input(shape=(20, )) # hash_input = Input(shape=(max_features,)) embedding_1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=True, name='embedding_layer')(sequence_input) x = SpatialDropout1D(0.3)(embedding_1) x1 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x) x2 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x1) x3 = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x2) x4 = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x1) max_pool1 = GlobalMaxPooling1D()(x1) max_pool2 = GlobalMaxPooling1D()(x2) max_pool3 = GlobalMaxPooling1D()(x3) max_pool4 = GlobalMaxPooling1D()(x4) # x1 = SpatialDropout1D(0.3)(embedding_1) # # x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x1) # # x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x) # # x = AttentionWithContext()(x) # dense_attention = Dense(64, activation="relu")(x) # average_pool_attention = GlobalAveragePooling1D()(x) # x1 = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x) # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x1) # max_pool1 = GlobalMaxPooling1D()(x) # embedding_2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False, # name='small_embedding_layer')(small_sequence_input) # x = SpatialDropout1D(0.3)(embedding_1) # x1 = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x) # x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x1) # max_pool2 = GlobalMaxPooling1D()(x) x1 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x) x2 = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x1) x3 = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x2) avg_pool4 = GlobalAveragePooling1D()(x1) avg_pool5 = GlobalAveragePooling1D()(x2) max_pool6 = GlobalMaxPooling1D()(x2) max_pool7 = GlobalMaxPooling1D()(x3) x_concat = concatenate([ max_pool1, max_pool2, max_pool3, max_pool4, avg_pool4, avg_pool5, max_pool6, max_pool7 ]) dense_1 = Dense(768, activation='relu')(x_concat) dense_2 = Dense(768, activation='relu')(x_concat) x_concat_2 = concatenate([x_concat, dense_1, dense_2]) features_dense = Dense(768, activation="relu")(features_input) # hash_dense = Dense(512,activation='relu')(hash_input) x = concatenate([x_concat_2, features_dense]) # x = concatenate([max_pool1, max_pool2,features_dense]) # x = Dense(128, activation='relu')(concat) # x = Dropout(0.1)(x) # x = BatchNormalization()(x) # # x = concatenate([concat, x]) preds = Dense(n_classes, activation="softmax")(x) model = Model(inputs=[sequence_input, features_input], outputs=preds) return model
def simple_LSTM_model(look_back): model = Sequential() model.add(CuDNNLSTM(64, input_shape=(look_back,1))) model.add(Dense(1,activation='sigmoid')) return model
def get_model_defination(self, dataset, embeddings): try: # Build the model print('Building the model...') lstm_dim = 64 ### token embedding layer # unused , left for compatability char_input = Input( shape=[dataset.abs_len, dataset.maxlen, dataset.maxlen_word], dtype='int32', name='char_input') if self.embedding: main_input = Input(shape=[dataset.abs_len, dataset.maxlen], dtype='int32', name='input') # (None, 35, 180) main_input_r = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.maxlen)))(main_input) embeds, _, _ = embeddings.init_weights(dataset.idx2word) embed = Embedding(input_dim=dataset.vocsize, output_dim=embeddings.embed_dim, input_length=dataset.maxlen, weights=[embeds], mask_zero=False, name='embedding', trainable=True)(main_input_r) token_embedding_layer = embed else: main_input = Input(shape=[dataset.abs_len, dataset.maxlen], dtype='float32', name='input') # (None, 35, 180) main_input_r = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.maxlen, 1)))(main_input) token_embedding_layer = main_input_r ### sentence encoding layer if self.encoding: blstm_layer = Bidirectional( CuDNNLSTM(lstm_dim, return_sequences=True))(token_embedding_layer) attention_layer = SeqSelfAttention( attention_activation='sigmoid')(blstm_layer) sentence_encoding_layer = attention_layer else: sentence_encoding_layer = token_embedding_layer ### context enriching layer if self.enriching: biLSTM = Bidirectional( CuDNNLSTM(lstm_dim, return_sequences=False))(sentence_encoding_layer) biLSTM = Dropout(0.5)(biLSTM) biLSTM_r = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.abs_len, 2 * lstm_dim)))(biLSTM) norm = BatchNormalization()(biLSTM_r) abstract_processing_layer = Dense(dataset.nclasses, name='feed_forword')(norm) else: abs_layer_in = Lambda(lambda x: K.reshape( x, shape=(-1, dataset.abs_len, 2 * lstm_dim * dataset.maxlen)) )(sentence_encoding_layer) feedforward = Dense(dataset.maxlen, name='feed_forword_1')(abs_layer_in) norm = BatchNormalization()(feedforward) abstract_processing_layer = Dense(dataset.nclasses, name='feed_forword')(norm) ### label sequence optimazion layer if self.optimazion: final_output = CRF( dataset.nclasses, learn_mode='marginal', sparse_target=True)(abstract_processing_layer) else: final_output = Activation('softmax')( abstract_processing_layer) # (None, 35, 4) model = Model(inputs=[main_input, char_input], outputs=final_output, name='output') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # model.summary() # plot_model(model, to_file='model.png', show_shapes=True) except Exception as e: # model.summary() # plot_model(model, to_file='model.png', show_shapes=True) traceback.print_exc() pdb.set_trace() return model