def build_rnn_attention_model(self): # 基于rnn的编码器 self.network_type = 'rnn_attention_model' model_input, encoder_input, decoder_output = general_input(self) # 目前来看在rnn前或中间加dense层效果很差 encoder_concatenate = layers.concatenate(encoder_input) bn_concatenate_layers = BatchNormalization()(encoder_concatenate) # lstm返回多返回一个传动带变量,这里不需要 values, h_env, _ = CuDNNLSTM(1024, return_sequences=True, return_state=True)(bn_concatenate_layers) # 这里模仿解码器的过程,将上一次的输出和hidden state 与 encoder_input合并作为query,这里输入的query远小于h,是个问题。。 # embedding后多了一个维度尺寸,压平才能与h conact decoder_lstm, h_act, _ = CuDNNLSTM(256, return_sequences=True, return_state=True)(decoder_output) decoder_lstm = CuDNNLSTM(256)(decoder_lstm) query = layers.concatenate([h_env, h_act]) c_vector, _ = BahdanauAttention(512)(query, values) # 这里我是多对一,同一序列只解码一次,所以直接用encoder的输出隐藏状态 # 由于只输出一次,解码也不再用rnn,而是直接全连接 t_status = layers.concatenate([c_vector, decoder_lstm]) t_status = layers.Dense(256, kernel_initializer='he_uniform')(t_status) output = layers.LeakyReLU(0.05)(t_status) shared_model = Model(model_input, output) return shared_model
def build_model(self): # Model building self.model = Sequential() # LSTM layers for layer_size in self.lstm_layers[:-1]: self.model.add( CuDNNLSTM(layer_size, input_size=1, return_sequences=True) ) # TODO: Sort the input shape thingy and check the return sequence thingy self.model.add(Dropout(self.dropout_rate)) self.model.add(BatchNormalization()) # Last LSTM layer self.model.add(CuDNNLSTM(self.lstm_layers[-1])) self.model.add(Dropout(self.dropout_rate)) self.model.add(BatchNormalization()) # Dense output layers self.model.add(Dense(32, activation='relu')) self.model.add(Dropout(self.dropout_rate)) self.model.add(Dense(1, activation='relu')) opt = tf.keras.optimizers.Adam(lr=self.learning_rate) self.model.compile( # TODO: CHECK metrics and loss loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'], )
def CuDNNLSTM_Autoencoder_GPU(): model = Sequential() #Each input in data sample is a 2D array that will be fed to LSTM Network layer #The output of the layer will be an encoded feature vector of the input data #Input shape is 2D array timesteps x n_features. #First layer will have 128 neurons model.add( CuDNNLSTM(128, input_shape=(timesteps, n_features), return_sequences=False)) #Dropout regularization. 20% of neurons model.add(Dropout(0.2)) #When second hidden layer is LSTM: #The encoded feature vector ouput must be replicated * timesteps model.add(RepeatVector(timesteps)) #Decoder layer #We set return_sequences to True. Each neuron will give a signal per timestep model.add(CuDNNLSTM(128, return_sequences=True)) model.add(Dropout(0.2)) #To use TimeDistributedlayer, return sequences from previous LSTM layer must be set to True #This is the output layer. It will create a vector with length of previous LSTM neurons model.add(TimeDistributed(Dense(n_features))) #Compiling using mean absolute error as the loss function #Adam' optimizer for gradient descent with default learning rate model.compile(loss='mae', optimizer=adam) return model
def build_stacked_rnn_model(self): self.network_type = 'stacked_rnn_model' model_input, encoder_input, decoder_output = general_input(self) # 分开做rnn效果不好 # 这里使用结合的 concatenate_layers = layers.concatenate(encoder_input) bn_concatenate_layers = BatchNormalization()(concatenate_layers) # 目前 512 - 1024 的效果好,但数据量较大 t_status = CuDNNLSTM(512, return_sequences=True)(bn_concatenate_layers) t_status = CuDNNLSTM(1024)(t_status) # 双向lstm,效果不好 # t_status = Bidirectional(CuDNNLSTM(1024))(concatenate_layers) decoder_lstm = CuDNNLSTM(256, return_sequences=True)(decoder_output) decoder_lstm = CuDNNLSTM(256)(decoder_lstm) t_status = layers.concatenate([decoder_lstm, t_status]) # 不是同一内容起源的最好不要用add # t_status = layers.add([t_status, q]) t_status = layers.Dense(512, kernel_initializer='he_uniform')(t_status) t_status = layers.LeakyReLU(0.05)(t_status) t_status = layers.Dense(256, kernel_initializer='he_uniform')(t_status) output = layers.LeakyReLU(0.05)(t_status) shared_model = Model(model_input, output) # 这里模型不能编译,不然后面无法扩充 return shared_model
def create_model_AZ_split_lstm(rnn_unit, concated_id_emb_dict): # input layers no mask add inputs_dict = get_seq_input_layers(cols=EMB_keys2do) inputs_all = list(inputs_dict.values()) # feature filter conv setting conv1d_info_dict = { 'creative_id': 256, 'ad_id': 128, 'advertiser_id': 128, 'industry': 64, 'product_category': 64, 'product_id': 128, 'time': 32, 'click_times': -1 } layers2concat = [] for id_nm, emb_matrix in concated_id_emb_dict.items(): if id_nm != 'click_times': print(id_nm, 'get embedding!') emb_layer = get_emb_layer(emb_matrix, trainable=TRAINABLE_DICT[id_nm]) x = emb_layer(inputs_dict[id_nm]) if conv1d_info_dict[id_nm] > -1: cov_layer = keras.layers.Conv1D( filters=conv1d_info_dict[id_nm], kernel_size=1, activation='relu') x = cov_layer(x) layers2concat.append(x) # embedding all connected concat_emb_w2v = keras.layers.concatenate(layers2concat) # 4 route lstm lstm_1 = keras.layers.Bidirectional(CuDNNLSTM( 128, return_sequences=True))(concat_emb_w2v) lstm_3 = keras.layers.Bidirectional(CuDNNLSTM( 64, return_sequences=True))(concat_emb_w2v) lstm_2 = keras.layers.Bidirectional(CuDNNLSTM( 32, return_sequences=True))(concat_emb_w2v) lstm_4 = keras.layers.Bidirectional(CuDNNLSTM( 128, return_sequences=True))(concat_emb_w2v) concat_emb_w2v = keras.layers.concatenate([lstm_1, lstm_2, lstm_3, lstm_4]) # last lstm concat_all = LSTM_net_AZ_split_lstm(concat_emb_w2v, n_unit=rnn_unit) concat_all = keras.layers.Dropout(0.3)(concat_all) # Dense layers x = keras.layers.Dense(256)(concat_all) x = keras.layers.PReLU()(x) x = keras.layers.Dense(256)(x) x = keras.layers.PReLU()(x) outputs_all = keras.layers.Dense(NUM_CLASSES, activation='softmax', name='age_gender')(x) model = keras.Model(inputs_all, outputs_all) print(model.summary()) # return compiled model tf 2.0 model.compile(optimizer=keras.optimizers.Adam(lr=1e-3), loss='sparse_categorical_crossentropy', metrics=['acc'])
def lstm_model(): model = tf.keras.models.Sequential() model.add(Embedding(word_len, 64, input_length=maxlen)) model.add(CuDNNLSTM(512, return_sequences=True)) model.add(CuDNNLSTM(512)) model.add(Dense(word_len, activation='softmax')) optimizer = tf.keras.optimizers.RMSprop(lr=0.01) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer) return model
def get_hegemax_model(seq_length, print_summary=True): forward_image_input = Input(shape=(seq_length, 160, 350, 3), name="forward_image_input") info_input = Input(shape=(seq_length, 3), name="info_input") hlc_input = Input(shape=(seq_length, 6), name="hlc_input") x = TimeDistributed(Cropping2D(cropping=((50, 0), (0, 0))))(forward_image_input) x = TimeDistributed(Lambda(lambda x: ((x / 255.0) - 0.5)))(x) x = TimeDistributed(Conv2D(24, (5, 5), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(36, (5, 5), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(48, (5, 5), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(64, (3, 3), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x) x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x) conv_output = TimeDistributed(Flatten())(x) x = concatenate([conv_output, info_input, hlc_input]) x = TimeDistributed(Dense(100, activation="relu"))(x) x = CuDNNLSTM(10, return_sequences=False)(x) steer_pred = Dense(10, activation="tanh", name="steer_pred")(x) x = TimeDistributed(Cropping2D(cropping=((50, 0), (0, 0))))(forward_image_input) x = TimeDistributed(Lambda(lambda x: ((x / 255.0) - 0.5)))(x) x = TimeDistributed(Conv2D(24, (5, 5), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(36, (5, 5), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(48, (5, 5), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(64, (3, 3), strides=(2, 2), activation="relu"))(x) x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x) x = TimeDistributed(Conv2D(64, (3, 3), activation="relu"))(x) conv_output = TimeDistributed(Flatten())(x) x = concatenate([conv_output, info_input, hlc_input]) x = TimeDistributed(Dense(100, activation="relu"))(x) x = CuDNNLSTM(10, return_sequences=False)(x) throtte_pred = Dense(1, name="throttle_pred")(x) brake_pred = Dense(1, name="brake_pred")(x) model = Model(inputs=[forward_image_input, info_input, hlc_input], outputs=[steer_pred, throtte_pred, brake_pred]) if print_summary: model.summary() return model
def lstm_model(maxlen, wl_chars): model = Sequential() # model.add(CuDNNLSTM(512, input_shape=(maxlen, len(wl_chars)), return_sequences=True, return_state=True, stateful=True)) model.add(CuDNNLSTM(512, return_sequences=True, input_shape=(maxlen, len(wl_chars)))) model.add(Dropout(0.2)) model.add(CuDNNLSTM(256, return_sequences=True)) model.add(Dropout(0.2)) model.add(CuDNNLSTM(128, return_sequences=True)) model.add(Dropout(0.2)) model.add(CuDNNLSTM(64)) model.add(Dropout(0.2)) model.add(Dense(len(wl_chars), activation='softmax')) return model
def buildModel(self, model_path=None): try: if model_path is None: model_path = './model_tensorboard_2.h5' mymodel = load_model(model_path) print('retrain model...........') history = mymodel.fit(self.x_train, self.y_train, batch_size=50, epochs=500, verbose=0, validation_split=0.2, callbacks=[TensorBoard('./logs2')]) self.history = history.history mymodel.save('./model_tensorboard_2.h5') self.model = mymodel self._write_val_loss_to_csv() except: print('train new model.........') start = datetime.datetime.now() mymodel = Sequential() mymodel.add(CuDNNLSTM(50, input_shape=(20, 1), return_sequences=True)) mymodel.add(Activation('sigmoid')) mymodel.add(BatchNormalization()) mymodel.add(Dropout(0.2)) mymodel.add(CuDNNLSTM(100, return_sequences=True)) mymodel.add(Activation('sigmoid')) mymodel.add(BatchNormalization()) mymodel.add(Dropout(0.2)) mymodel.add(CuDNNLSTM(100)) mymodel.add(Activation('tanh')) mymodel.add(BatchNormalization()) mymodel.add(Dropout(0.2)) mymodel.add(Dense(50, activation='sigmoid')) mymodel.add(BatchNormalization()) mymodel.add(Dropout(0.2)) mymodel.add(Dense(20, activation='sigmoid')) mymodel.add(BatchNormalization()) mymodel.add(Dropout(0.2)) mymodel.add(Dense(22, activation='relu')) mymodel.compile('adam', 'mae', metrics=['mae']) print(mymodel.summary()) self.model = mymodel history = mymodel.fit(self.x_train, self.y_train, batch_size=50, epochs=3000, verbose=2, validation_split=0.2, callbacks=[TensorBoard()]) self.history = history.history mymodel.save('./model_tensorboard_2.h5') end = datetime.datetime.now() print('耗时',end-start) self._write_val_loss_to_csv()
def build_train_rnn(x_train, x_test, y_train, y_test, epochs=250, batch_size=64): clear_session() classifier = tf.keras.Sequential() classifier.add( CuDNNLSTM(units=64, return_sequences=True, input_shape=(x_train.shape[1:]), kernel_initializer='random_uniform', kernel_regularizer=tf.keras.regularizers.l2(l=1e-4))) classifier.add( tf.keras.layers.Dropout(0.2) ) # ignore 20% of the neurons in both forward and backward propagation classifier.add( CuDNNLSTM(units=64, return_sequences=True, kernel_initializer='random_uniform', kernel_regularizer=tf.keras.regularizers.l2(l=1e-4))) classifier.add( tf.keras.layers.Dropout(0.2) ) # ignore 20% of the neurons in both forward and backward propagation classifier.add( CuDNNLSTM(units=64, return_sequences=False, kernel_initializer='random_uniform', kernel_regularizer=tf.keras.regularizers.l2(l=1e-4))) classifier.add(tf.keras.layers.Dropout(0.2)) classifier.add( tf.keras.layers.Dense(units=128, kernel_initializer='random_uniform')) classifier.add(tf.keras.layers.Dropout(0.2)) classifier.add( tf.keras.layers.Dense(units=y_train.shape[1], activation='softmax', kernel_initializer='random_uniform')) adam = tf.keras.optimizers.Adam(lr=1e-4, decay=1e-7) classifier.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) history = classifier.fit(x=x_train, y=y_train, validation_data=(x_test, y_test), epochs=epochs, batch_size=batch_size) return history
def trans_net(inputs, masks, hidden_unit=128): inputs = tf.keras.layers.Dropout(0.3)(inputs) encodings = tf.keras.layers.Conv1D(filters=inputs.shape[-1], kernel_size=1, padding='same', activation='relu')(inputs) # trans tunnel for i in range(1): # pre Norm encodings = LayerNormalization()(encodings) # Masked-Multi-head-Attention masked_attention_out = MultiHeadAttention(8, encodings.shape[-1] // 8)( [encodings, encodings, encodings, masks]) # Add & Norm masked_attention_out = masked_attention_out + encodings # Feed-Forward ff = PositionWiseFeedForward(encodings.shape[-1], hidden_unit) ff_out = ff(masked_attention_out) # LSTM x = tf.keras.layers.Bidirectional( CuDNNLSTM(hidden_unit, return_sequences=True))(encodings) # linear x = tf.keras.layers.Conv1D(filters=encodings.shape[-1], kernel_size=1, padding='same', activation='relu')(x) # 3 项Add & Norm x = x + masked_attention_out + ff_out x = LayerNormalization()(x) return x
def get_bidirectional_cudnn_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False): """ cudnn provided versions, should be much faster :param pre_embeddings: :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) if use_lstm: x = Bidirectional(CuDNNLSTM(RNN_DIM, return_sequences=True))(embedding) # LSTM else: x = Bidirectional(CuDNNGRU(RNN_DIM, return_sequences=True))(embedding) # GRU # add none or one of the following attention layers x, atten_layer = self.do_attention(x) fn = kb.function([input], [atten_layer.att_weights]) if dp_rate > 0: # 加dropout层 x = Dropout(dp_rate)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model, fn
def LSTM_net_AZ_split_lstm(emb_layer, n_unit=128): # lstm x = keras.layers.Bidirectional(CuDNNLSTM(n_unit, return_sequences=True))(emb_layer) # conv feature filter structure conv1_a = keras.layers.Conv1D( filters=128, kernel_size=2, padding='same', activation='relu', )(x) conv1_b = keras.layers.Conv1D( filters=64, kernel_size=4, padding='same', activation='relu', )(x) conv1_c = keras.layers.Conv1D( filters=32, kernel_size=8, padding='same', activation='relu', )(x) max_pool1 = keras.layers.GlobalMaxPooling1D()(x) gp1_a = keras.layers.GlobalAveragePooling1D()(conv1_a) gp1_b = keras.layers.GlobalAveragePooling1D()(conv1_b) gp1_c = keras.layers.GlobalMaxPooling1D()(conv1_c) # res concat = keras.layers.concatenate([max_pool1, gp1_a, gp1_b, gp1_c]) return concat
def model(embedding_size, n_a): X = Input(batch_shape=(batch_size, None, embedding_size)) a1 = Bidirectional(CuDNNLSTM(units=n_a, return_sequences = True))(X) # functional API needs specifying inputs, just like any functions. a2 = Dense(16, activation = "tanh")(a1) yhat = Dense(1, activation = "sigmoid")(a2) model = Model(inputs = X, outputs = yhat) return model
def build_model(): model = tf.keras.models.Sequential() model.add(CuDNNLSTM(128, input_shape=(maxlen, len(chars)))) model.add(Dense(len(chars), activation='softmax')) optimizer = tf.keras.optimizers.RMSprop(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) return model
def get_cnn_rnn_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False, filter_sizes=[2, 3, 4]): """ first CNN to generate a vector, then apply RNN on the vector :param pre_embeddings: :param dp_rate: drop out rate :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) # add a convolution layer c = Conv1D(NUM_FILTERS, 3, padding='valid', activation='relu')(embedding) cc = MaxPooling1D()(c) if dp_rate > 0: # 加dropout层 cc = Dropout(dp_rate)(cc) if use_lstm: x = CuDNNLSTM(RNN_DIM)(cc) else: x = CuDNNGRU(RNN_DIM)(cc) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def get_cudnn_version_model(self, pre_embeddings, dp_rate=-1.0, use_lstm=False): """ cudnn provided versions, should be much faster :param pre_embeddings: :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) if use_lstm: x = CuDNNLSTM(RNN_DIM)(embedding) # LSTM else: x = CuDNNGRU(RNN_DIM)(embedding) # GRU if dp_rate > 0: # 加dropout层 x = Dropout(dp_rate)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def Transformer_net_AZ_trans_lstm(inputs, hidden_unit=512): # SpatialDropout1D inputs = keras.layers.SpatialDropout1D(0.3)(inputs) # conv feature filter encodings = keras.layers.Conv1D(filters=inputs.shape[-1].value, kernel_size=1, padding='same', activation='relu')(inputs) # trans tunnel for i in range(1): # pre Norm encodings = LayerNormalization()(encodings) # Masked-Multi-head-Attention masked_attention_out = MultiHeadAttention(8, encodings.shape[-1].value // 8, masking=False, _masking_num=-2 ** 32 - 1, masking_type='NOMASK') \ ([encodings, encodings, encodings]) # no mask attention # Add masked_attention_out = masked_attention_out + encodings # Feed-Forward ff = PositionWiseFeedForward(encodings.shape[-1].value, hidden_unit) ff_out = ff(masked_attention_out) # LSTM x = keras.layers.Bidirectional(CuDNNLSTM(256, return_sequences=True))(encodings) # linear x = keras.layers.Conv1D(filters=encodings.shape[-1].value, kernel_size=1, padding='same', activation='relu')(x) # Add & Norm x = x + masked_attention_out x = x + ff_out x = LayerNormalization()(x) return x
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def build_discriminator(self): model = Sequential() model.add( CuDNNLSTM(512, input_shape=self.seq_shape, return_sequences=True)) model.add(Bidirectional(CuDNNLSTM(512))) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() seq = Input(shape=self.seq_shape) validity = model(seq) return Model(seq, validity)
def build_LSTM_model(max_features=10000): model = Sequential() model.add(Embedding(max_features, 32)) model.add(CuDNNLSTM(32)) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) return model
def build_model_chars(num_chars, lstm_units=256, optimiser='adam'): """ encoder_input will be the large matrix with all the information Outputs from encoder LSTM are: encoder_outputs: hidden_state at every timestep; state_h: final hidden state; state_c: final cell state decoder_input will be matrix with each letter of output Initial state for decoder lstm is encoder states """ encoder_input = Input(shape=(None, num_chars), name='encoder_input') encoder_outputs, state_h, state_c = CuDNNLSTM(lstm_units, return_sequences=True, return_state=True, name="encoder_lstm")(encoder_input) encoder_states = [state_h, state_c] decoder_input = Input(shape=(None, num_chars), name='decoder_input') decoder_lstm = CuDNNLSTM(lstm_units, return_sequences=True, return_state=True, name="decoder_lstm") decoder_outputs, _, _ = decoder_lstm(decoder_input, initial_state=encoder_states) attention = Attention(name='attention') atten_output = attention([decoder_outputs, encoder_outputs]) atten_output.set_shape([None, None, lstm_units]) concat = Concatenate() concat_output = concat([decoder_outputs, atten_output]) decoder_dense = Dense(num_chars, input_shape=[None, None, lstm_units], activation='softmax', name='softmax_output') decoder_output = decoder_dense(concat_output) model = Model([encoder_input, decoder_input], decoder_output) model.compile(optimizer=optimiser, loss='categorical_crossentropy') encoder_info = [state_h, state_c, encoder_outputs] encoder_model = Model(encoder_input, encoder_info) decoder_state_input_h = Input(shape=(lstm_units,)) decoder_state_input_c = Input(shape=(lstm_units,)) decoder_state_input_enc = Input(shape=[None, lstm_units]) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm(decoder_input, initial_state=decoder_states_inputs) dec_attention = attention([decoder_outputs, decoder_state_input_enc]) concat_output = concat([decoder_outputs, dec_attention]) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(concat_output) dec_state_inputs = [decoder_state_input_h, decoder_state_input_c, decoder_state_input_enc] decoder_model = Model([decoder_input] + dec_state_inputs, [decoder_outputs] + decoder_states) return model, encoder_model, decoder_model
def create_vae(input_timesteps, latent_dim, reparameterize_layer): # Create NN Graph timestep_data = Input(shape=(input_timesteps, 5), name='timestep_data') # action_data = Input(shape=(input_timesteps, 1), name='action_data') action_data = Lambda(lambda x: x[:, :, -1:], output_shape=(1, ), name='action_data')(timestep_data) #combined_data = Concatenate()([timestep_data, action_data]) rnn_state = Bidirectional(CuDNNLSTM(units=10, input_shape=(input_timesteps, 5), return_sequences=True), name='rnn_state')(timestep_data) latent_state = TimeDistributed(Dense( units=latent_dim * 2, bias_initializer=K.initializers.zeros(), kernel_initializer=K.initializers.zeros(), kernel_constraint=K.constraints.max_norm(0.5)), name='latent_state')(rnn_state) latent_sample = TimeDistributed(Lambda(reparameterize_layer, output_shape=(latent_dim, )), name='latent_sample')(latent_state) observations = TimeDistributed(Dense(units=100, activation='relu'), name='obs1')(latent_sample) observations = TimeDistributed(Dense(units=100, activation='relu'), name='obs2')(observations) observations = TimeDistributed(Dense(units=100, activation='relu'), name='obs3')(observations) observations = TimeDistributed(Dense(units=3 * 2), name='obs_out')(observations) rewards = TimeDistributed(Dense(units=100, activation='relu'), name='rew1')(latent_sample) rewards = TimeDistributed(Dense(units=100, activation='relu'), name='rew2')(rewards) rewards = TimeDistributed(Dense(units=100, activation='relu'), name='rew3')(rewards) rewards = TimeDistributed(Dense(units=1 * 2), name='rew_out')(rewards) state_action = Concatenate()([latent_sample, action_data]) next_state = TimeDistributed(Dense(units=100, activation='relu'), name='ns1')(state_action) #next_state = TimeDistributed(Dense(units=100, activation='relu'), name='ns2')(next_state) #next_state = TimeDistributed(Dense(units=100, activation='relu'), name='ns3')(next_state) next_state = TimeDistributed(Dense(units=latent_dim * 2), name='next_state')(next_state) # Create VAE Model return Model(inputs=timestep_data, outputs=[ observations, rewards, next_state, latent_state, latent_sample ])
def get_model(hyperparameters, predictors, targets): # Initialising the RNN model = Sequential() regularizer = l2(0.01) optimizer = Adam(lr=hyperparameters['learning_rate']) model.add( CuDNNLSTM(units=30, input_shape=(hyperparameters['input_sequence_length'], len(predictors)), return_sequences=True, kernel_regularizer=regularizer)) model.add(GaussianNoise(1e-4)) model.add(BatchNormalization()) model.add( CuDNNLSTM(units=20, return_sequences=True, kernel_regularizer=regularizer)) model.add(GaussianNoise(1e-4)) model.add(BatchNormalization()) model.add( CuDNNLSTM(units=10, kernel_regularizer=regularizer, return_sequences=False)) model.add(GaussianNoise(1e-4)) model.add(BatchNormalization()) model.add( Dense(hyperparameters['output_sequence_length'] * len(targets), activation='relu')) model.add( Reshape((hyperparameters['output_sequence_length'], len(targets)))) model.compile(optimizer=optimizer, loss='mean_absolute_error') # print(model.summary()) return model
def create_model(num_frame, num_joint, num_output): model = Sequential() model.add( CuDNNLSTM(50, input_shape=(num_frame, num_joint), return_sequences=False)) model.add(Dropout(0.4)) #使用Dropout函数可以使模型有更多的机会学习到多种独立的表征 model.add(Dense(60)) model.add(Dropout(0.4)) model.add(Dense(num_output, activation='softmax')) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin( np.abs(lstm_units_array - sequence_len))] lstm_1 = CuDNNLSTM(lstm_units, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention( 8, 16)([activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda(lambda x: tf.reshape(tf.nn.top_k( tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))( attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='softplus',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='softplus'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def Create_pretrained_model(dim, n_sequence, n_channels, n_output): model = Sequential() model.add( TimeDistributed(MobileNetV2(weights='imagenet', include_top=False), input_shape=(n_sequence, *dim, n_channels))) model.add(TimeDistributed(GlobalAveragePooling2D())) model.add(CuDNNLSTM(64, return_sequences=False)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(24, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(n_output, activation='softmax')) model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin(np.abs(lstm_units_array-sequence_len))] lstm_1 = Bidirectional(CuDNNLSTM(lstm_units, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) if lstm_units <=128: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='relu',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='relu'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) lstm_1 = CuDNNLSTM(128, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) # no dropout to get more infomation for classifying a large number # classes else: attention_1 = Attention(8, 16)( [activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda( lambda x: tf.reshape(tf.nn.top_k(tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))(attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='softplus')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam( lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True