def __init__(self, past_measurement_dimensions, future_measurements_dimensions, hidden_dim, action_dimension, drop_prob=0.2): """ Build model, predict only next quality :param past_measurement_dimensions: :param future_measurements_dimensions: :param hidden_dim: :param action_dimension: :param drop_prob: """ self.policy_past_input = Input(shape=(None, past_measurement_dimensions)) self.policy_past_GRU = GRU(units=hidden_dim, return_sequences=False, dropout=drop_prob)(self.policy_past_input) self.policy_future_input = Input(shape=(None, future_measurements_dimensions)) self.policy_future_GRU = GRU(units=hidden_dim, return_sequences=False, dropout=drop_prob)( self.policy_future_input) self.policy_dense1 = Dense(units=hidden_dim, activation="relu") self.policy_dense2 = Dense(activation="softmax", units=action_dimension) concatenated = concatenate([self.policy_past_GRU, self.policy_future_GRU]) concatenated = self.policy_dense1(concatenated) self.policy_action_output = self.policy_dense2(concatenated) self.model = Model(inputs=[self.policy_past_input, self.policy_future_input], outputs=self.policy_action_output) self.model.compile(loss="categorical_crossentropy", optimizer='adam')
def decoder(statesize, embeddingsize, maxnumwords, transfer_layer_output): transfer_values_size = K.int_shape(transfer_layer_output)[1] transfer_values_input = Input(shape=(transfer_values_size, ), name='transfer_values_input') decoder_transfer_map = Dense(state_size, activation='tanh', name='decoder_transfer_map') decoder_input = Input(shape=(None, ), name='decoder_input') decoder_embedding = Embedding(input_dim=num_words, output_dim=embedding_size, name='decoder_embedding') decoder_gru1 = GRU(state_size, name='decoder_gru1', return_sequences=True) decoder_gru2 = GRU(state_size, name='decoder_gru2', return_sequences=True) decoder_gru3 = GRU(state_size, name='decoder_gru3', return_sequences=True) decoder_dense = Dense(num_words, activation='linear', name='decoder_output') initial_state = decoder_transfer_map(transfer_values_input) net = decoder_input net = decoder_embedding(net) net = decoder_gru1(net, initial_state=initial_state) net = decoder_gru2(net, initial_state=initial_state) net = decoder_gru3(net, initial_state=initial_state) decoder_output = decoder_dense(net) decoder_model = Model(inputs=[transfer_values_input, decoder_input], outputs=[decoder_output]) return decoder_model
def define_attention_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units): encoder_inputs = Input(shape=(src_timesteps, src_vocab), name='encoder_inputs') decoder_inputs = Input(shape=(tar_timesteps - 1, tar_vocab), name='decoder_inputs') encoder_gru = GRU(n_units, return_sequences=True, return_state=True, name='encoder_gru') encoder_out, encoder_state = encoder_gru(encoder_inputs) decoder_gru = GRU(n_units, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_state) attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) dense = Dense(tar_vocab, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) #model = Model(inputs=[encoder_inputs, decoder_inputs],outputs=decoder_pred) model = Model(inputs=encoder_inputs, outputs=decoder_pred) return model
def _create(self): EMBEDDING_DIMS = 50 GRU_DIMS = 128 DROPOUT_GRU = 0.0 RNN_DROPOUT = 0.0 print('Creating Model...') # EMBEDDING input_play = Input(shape=(SEQ_LEN, ), dtype='int32', name='input_play') input_save = Input(shape=(SEQ_LEN, ), dtype='int32', name='input_save') # Keras requires the total_dim to have 2 more dimension for "other" class embedding_layer = Embedding(input_dim=(EMBEDDING_CLASSES + 2), output_dim=EMBEDDING_DIMS, input_length=SEQ_LEN, mask_zero=False, trainable=True, name='emb') input_play_encoded = embedding_layer(input_play) input_save_encoded = embedding_layer(input_save) # biGru = Bidirectional(GRU(GRU_DIMS, return_sequences=False, activation='relu', dropout=DROPOUT_GRU), name='gru1') gru1 = GRU(GRU_DIMS, return_sequences=True, dropout=DROPOUT_GRU, recurrent_dropout=RNN_DROPOUT, name='gru1')(input_play_encoded) gru2 = GRU(GRU_DIMS, return_sequences=True, dropout=DROPOUT_GRU, recurrent_dropout=RNN_DROPOUT, name='gru2')(input_save_encoded) play_encoded = GlobalAveragePooling1D(name='globplay')(gru1) save_encoded = GlobalAveragePooling1D(name='globsave')(gru2) # TIME_OF_DAY OHE ohe_tod = Input(shape=(TOTAL_TOD_BINS, ), name='time_of_day_ohe') # DAY_OF_WEEK OHE ohe_dow = Input(shape=(TOTAL_DOW_BINS, ), name='day_of_wk_ohe') # MERGE LAYERS print('Merging features...') merged = concatenate([play_encoded, save_encoded, ohe_tod, ohe_dow], axis=1, name='concat') # FULLY CONNECTED LAYERS dense = Dense(1024, activation='relu', name='main_dense')(merged) pred = Dense(TARGET_CLASSES, activation='softmax', name='output')(dense) self.model = Model(inputs=[input_play, input_save, ohe_tod, ohe_dow], outputs=[pred]) print(self.model.summary()) return self.model
def __init__(self, past_measurement_dimensions, future_measurements_dimensions, hidden_dim, drop_prob=0.2): """ V(S) function :param past_measurement_dimensions: :param future_measurements_dimensions: :param hidden_dim: :param drop_prob: """ discriminator_past_input = Input(shape=(None, past_measurement_dimensions)) discriminator_future_input = Input(shape=(None, future_measurements_dimensions)) self.discriminator_past_GRU = GRU(units=hidden_dim, return_sequences=False, dropout=drop_prob) self.discriminator_future_GRU = GRU(units=hidden_dim, return_sequences=False, dropout=drop_prob) self.discriminator_dense_1 = Dense(units=hidden_dim // 2, activation="relu") self.discriminator_dense_2 = Dense(units=hidden_dim // 2, activation="relu") self.discriminator_dense_3 = Dense(units=hidden_dim // 4, activation="relu") self.discriminator_dense_final = Dense(units=1) concatenated = concatenate( [self.discriminator_past_GRU(discriminator_past_input), self.discriminator_future_GRU(discriminator_future_input)]) concatenated = self.discriminator_dense_1(concatenated) concatenated = self.discriminator_dense_2(concatenated) concatenated = self.discriminator_dense_3(concatenated) linear_output_layer = self.discriminator_dense_final(concatenated) self.model = Model(inputs=[discriminator_past_input, discriminator_future_input], outputs=linear_output_layer) self.model.compile(loss="mse", optimizer='adam')
def get_rnn_model(window_size, features, pred_length): inputs = Input(shape=(window_size, features)) x = GRU(128, kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), return_sequences=True, input_shape=(window_size, features))(inputs) x = BatchNormalization()(x) x = Dropout(0.5)(x) x = GRU(128, kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) preds = Dense(pred_length, activation='linear', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(x) model = Model(inputs=inputs, outputs=preds) optimer = optimizers.Adam(lr=0.001) model.compile(optimizer=optimer, loss='mse', metrics=['mae']) return model
def create_kaggle_model(fingerprint_input, model_settings, is_training): if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] mel_bins = 80 input_shape = [input_time_size, mel_bins, 1] fingerprint_4d = tf.reshape(fingerprint_input, [-1] + input_shape) conv_filters = 32 x = Conv2D(filters=conv_filters, kernel_size=[5, 20], strides=[2, 8], padding='same', use_bias=False, input_shape=input_shape)(fingerprint_4d) x = tf.layers.BatchNormalization(scale=False)(x) x = Activation('relu')(x) # print(x.get_shape().as_list()) x = Reshape((49, 320))(x) rnn_size = 256 x = Bidirectional(GRU(rnn_size, return_sequences=True, unroll=True))(x) x = Bidirectional(GRU(rnn_size, return_sequences=True, unroll=True))(x) x = Dense(rnn_size, activation='relu')(x) x = Flatten()(x) label_count = model_settings['label_count'] final_fc = Dense(label_count)(x) if is_training: return final_fc, dropout_prob else: return final_fc
def __init__(self, past_measurement_dimensions, future_measurements_dimensions, hidden_dim, action_dimension, drop_prob=0.2): """ General purpose keras GRU classifer :param past_measurement_dimensions: :param future_measurements_dimensions: :param hidden_dim: :param action_dimension: :param drop_prob: """ discriminator_past_input = Input(shape=(None, past_measurement_dimensions)) discriminator_future_input = Input(shape=(None, future_measurements_dimensions)) discriminator_action_input = Input(shape=(action_dimension,)) self.discriminator_past_GRU = GRU(units=hidden_dim, return_sequences=False, dropout=drop_prob) self.discriminator_future_GRU = GRU(units=hidden_dim, return_sequences=False, dropout=drop_prob) self.discriminator_dense_1 = Dense(units=hidden_dim, activation="relu") self.discriminator_dense_2 = Dense(units=hidden_dim, activation="relu") self.discriminator_dense_final = Dense(activation="softmax", units=2) # We predict two -> Is it real or not concatenated = concatenate( [self.discriminator_past_GRU(discriminator_past_input), self.discriminator_future_GRU(discriminator_future_input)]) concatenated = self.discriminator_dense_1(concatenated) concatenated = concatenate([concatenated, discriminator_action_input]) concatenated = self.discriminator_dense_2(concatenated) discriminator_likelihood = self.discriminator_dense_final(concatenated) self.model = Model(inputs=[discriminator_past_input, discriminator_future_input, discriminator_action_input], outputs=discriminator_likelihood) self.model.compile(loss="categorical_crossentropy", optimizer='adam')
def get_multilayer_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False): """ construct 3-layer LSTM or GRU models :param pre_embeddings: :param dp_rate: drop out rate :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) model = Sequential() model.add(embedding_layer) if use_lstm: model.add(LSTM(RNN_DIM, return_sequences=True)) # returns a sequence of vectors of dimension RNN_DIM model.add(LSTM(RNN_DIM, return_sequences=True)) # returns a sequence of vectors of dimension RNN_DIM model.add(LSTM(RNN_DIM, recurrent_dropout=dp_rate)) # return a single vector of dimension RNN_DIM else: model.add(GRU(RNN_DIM, return_sequences=True)) # returns a sequence of vectors of dimension RNN_DIM model.add(GRU(RNN_DIM, return_sequences=True)) # returns a sequence of vectors of dimension RNN_DIM model.add(GRU(RNN_DIM, recurrent_dropout=dp_rate)) # return a single vector of dimension 128 # model.add(Dropout(dp_rate)) model.add(Dense(self.class_num, activation=self.last_activation)) return model
def makeGGDDModel(TSLen, nbOfFeat, batch_size=None, lrPar=0.001, u1=32, u2=64, d1=32, d2=64): source = Input(shape=(TSLen, nbOfFeat), batch_size=batch_size, dtype=tf.float32, name='Input') gru1 = GRU(u1, name='GRU1', dropout=0.1, recurrent_dropout=0.5, return_sequences=True)(source) gru2 = GRU(u2, name='GRU2', dropout=0.1, recurrent_dropout=0.5)(gru1) dense1 = Dense(d1, name='Dense1')(gru2) dense2 = Dense(d2, name='Dense2')(dense1) predicted_var = Dense(1, name='Output')(dense2) model = tf.keras.Model(inputs=[source], outputs=[predicted_var]) model.compile(optimizer=tf.train.RMSPropOptimizer(learning_rate=lrPar), loss='mae') return model
def makeGRUGRUModel(TSLen, nbOfFeat, batch_size=None, lrPar=0.001, u1=32, u2=64, d1=1): # 2 gru layers with dropout=0.1 , and recurrent_dropout = 0.5 , # max one extra dense layers more than the dense layer for predicted_var source = Input(shape=(TSLen, nbOfFeat), batch_size=batch_size, dtype=tf.float32, name='Input') gru1 = GRU(u1, name='GRU1', dropout=0.1, recurrent_dropout=0.5, return_sequences=True)(source) gru2 = GRU(u2, name='GRU2', dropout=0.1, recurrent_dropout=0.5)(gru1) if d1 == 1: predicted_var = Dense(d1, name='Output')(gru2) elif d1 > 1: dense1 = Dense(d1, name='Dense1')(gru2) predicted_var = Dense(1, name='Output')(dense1) model = tf.keras.Model(inputs=[source], outputs=[predicted_var]) model.compile(optimizer=tf.train.RMSPropOptimizer(learning_rate=lrPar), loss='mae') return model
def train_rnn(filename): X_train_token, X_dev_token, X_test_token, y_train, y_dev, y_test, tokenizer = my_data.generate_rnn( filename) paragram_embeddings = load_para(tokenizer.word_index) model = Sequential() optimizer = Adam(lr=1e-3) model.add( Embedding(weights=[paragram_embeddings], trainable=False, input_dim=num_words, output_dim=embedding_size, input_length=max_tokens)) model.add(GRU(units=32, return_sequences=True)) model.add(GRU(units=16, dropout=0.5, return_sequences=True)) model.add(GRU(units=8, return_sequences=True)) model.add(GRU(units=4)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['AUC', 'accuracy']) model.summary() history = model.fit(np.array(X_train_token), y_train, validation_data=(np.array(X_dev_token), y_dev), epochs=4, batch_size=500) save_model(model, path + 'rnn_model_ref.h5') logging.info('train complete') return model
def create_model(self): hidden_size = 256 enc_timesteps = self.max_encoder_seq_length #timesteps = self.max_encoder_seq_length #perhaps making timesteps size of max sequence length would work?????"" dec_timesteps = self.max_decoder_seq_length print(f"embedding size: {self.glove_model.embedding_size}") # encoder_inputs = Input(shape=(None, self.glove_model.embedding_size), name='encoder_inputs') # decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs') encoder_inputs = Input(shape=(enc_timesteps, self.glove_model.embedding_size), name='encoder_inputs') decoder_inputs = Input(shape=(dec_timesteps, self.num_decoder_tokens), name='decoder_inputs') # Encoder GRU encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = GRU(hidden_size*2, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru( decoder_inputs, initial_state=Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state]) ) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(self.num_decoder_tokens, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) self.model.compile(optimizer=tf.train.RMSPropOptimizer(learning_rate=0.01), loss='categorical_crossentropy') self.model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, enc_timesteps, self.glove_model.embedding_size), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs) self.encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, self.num_decoder_tokens), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, dec_timesteps, 2*hidden_size), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(batch_size, 2*hidden_size), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder_gru( decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) self.decoder_model = Model(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])
def define_nmt(hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize): """ Defining a NMT model """ # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps - 1, fr_vsize), name='decoder_inputs') else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(shape=(fr_timesteps - 1, fr_vsize), name='decoder_inputs') # Encoder GRU encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru'), name='bidirectional_decoder') decoder_out, decoder_fwd_state, decoder_back_state = decoder_gru(decoder_inputs, initial_state=[encoder_fwd_state, encoder_back_state]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(fr_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, fr_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, 2*hidden_size), name='encoder_inf_states') decoder_init_fwd_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_fwd_init') decoder_init_back_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_back_init') decoder_inf_out, decoder_inf_fwd_state, decoder_inf_back_state = decoder_gru(decoder_inf_inputs, initial_state=[decoder_init_fwd_state, decoder_init_back_state]) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model(inputs=[encoder_inf_states, decoder_init_fwd_state, decoder_init_back_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_fwd_state, decoder_inf_back_state]) return full_model, encoder_model, decoder_model
def deep_rnnblocks(inputdim, inputshape): if inputdim < 2: return (Bidirectional(GRU(10, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(20, return_sequences=False))) elif inputdim < 4: return (Bidirectional(GRU(15, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(30, return_sequences=False))) elif inputdim < 6: return (Bidirectional(GRU(20, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(40, return_sequences=False))) else: return (Bidirectional(GRU(30, return_sequences=True), input_shape=inputshape, name='input'), Bidirectional(GRU(60, return_sequences=False)))
def traning(): ''' 训练函数 :return: ''' num_of_word = 50 words_use = 40000 #初始化模型并加载 data = Data(path="../data/") w2v = Word2Vec(len=num_of_word, path="../third/models") #构建训练数据集和训练结果集 sentences = data.negative_data + data.neural_data + data.positive_data traning_seq = w2v.convert_sentences_to_seqences(sentences) traning_res = data.create_classs() print(traning_seq) print(traning_res) #创建输入层向量 embding_dimen = w2v.w2v_model["银行"].shape[0] embding_mat = np.zeros((words_use, embding_dimen)) for i in range(num_of_word): embding_mat[i, :] = w2v.w2v_model[w2v.w2v_model.index2word[i]] embding_mat = embding_mat.astype('float32') traning_seq[traning_seq >= words_use] = 0 #创建模型 model = Sequential() model.add( Embedding(words_use, embding_dimen, weights=[embding_mat], input_length=num_of_word, trainable=False)) model.add(GRU(units=32, return_sequences=True)) model.add(GRU(units=16, return_sequences=False)) model.add(Dense(3, activation='softmax')) #模型编译 model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #模型训练 使用后10个数据测试 model.fit(traning_seq[:-10], traning_res[:-10], validation_split=0.1, epochs=40, batch_size=128) #使用后10个数据测试 result = model.evaluate(traning_seq[-10:], traning_res[-10:]) print('Accuracy:{0:.3%}'.format(result[1]))
def build_model(time_steps, num_classes, inputdim): model = Sequential() model.add(Bidirectional(GRU(10, return_sequences=True), input_shape=(time_steps, inputdim))) model.add(Bidirectional(GRU(20, return_sequences=False))) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def build_model(self): """ Function to build the seq2seq model used. :return: Encoder model, decoder model (used for predicting) and full model (used for training). """ # Define model inputs for the encoder/decoder stack x_enc = Input(shape=(self.seq_len_in, self.input_feature_amount), name="x_enc") x_dec = Input(shape=(self.seq_len_out, self.output_feature_amount), name="x_dec") # Add noise x_dec_t = GaussianNoise(0.2)(x_dec) # Define the encoder GRU, which only has to return a state encoder_gru = GRU(self.state_size, return_sequences=True, return_state=True, name="encoder_gru") encoder_out, encoder_state = encoder_gru(x_enc) # Decoder GRU decoder_gru = GRU(self.state_size, return_state=True, return_sequences=True, name="decoder_gru") # Use these definitions to calculate the outputs of out encoder/decoder stack dec_intermediates, decoder_state = decoder_gru(x_dec_t, initial_state=encoder_state) # Define the attention layer attn_layer = AttentionLayer(name="attention_layer") attn_out, attn_states = attn_layer([encoder_out, dec_intermediates]) # Concatenate decoder and attn out decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([dec_intermediates, attn_out]) # Define the dense layer dense = Dense(self.output_feature_amount, activation='linear', name='output_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Define the encoder/decoder stack model encdecmodel = tsModel(inputs=[x_enc, x_dec], outputs=decoder_pred) # Define the separate encoder model for inferencing encoder_inf_inputs = Input(shape=(self.seq_len_in, self.input_feature_amount), name="encoder_inf_inputs") encoder_inf_out, encoder_inf_state = encoder_gru(encoder_inf_inputs) encoder_model = tsModel(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_state]) # Define the separate encoder model for inferencing decoder_inf_inputs = Input(shape=(1, self.output_feature_amount), name="decoder_inputs") encoder_inf_states = Input(shape=(self.seq_len_in, self.state_size), name="encoder_inf_states") decoder_init_state = Input(shape=(self.state_size,), name="decoder_init") decoder_inf_out, decoder_inf_state = decoder_gru(decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = tsModel(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) return encoder_model, decoder_model, encdecmodel
def gru(inputs, num_layers=None, num_units=None, direction='bidirectional', dropout=None, seqlens=None, scope="gru", reuse=None, use_gpu=True): '''Applies a GRU. Args: inputs: A 3d tensor with shape of [N, T, C]. num_units: An int. The number of hidden units. bidirection: A boolean. If True, bidirectional results are concatenated. scope: Optional scope for `variable_scope`. reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: If bidirection is True, a 3d tensor with shape of [N, T, 2*num_units], otherwise [N, T, num_units]. ''' from tensorflow.python.keras.layers import GRU, Bidirectional, RNN with tf.variable_scope(scope, reuse=reuse): if num_units is None: num_units = inputs.get_shape().as_list[-1] if num_layers == 1: dropout = 0.0 if use_gpu == True: gru_cell = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, dropout=dropout, direction=direction) outputs, _ = gru_cell(inputs) else: outputs = inputs for layer in range(num_layers): if direction == "bidirectional": outputs = Bidirectional(layer=GRU(units=num_units, dropout=dropout, return_sequences=True), merge_mode='concat')(outputs) else: gru_layer = GRU(units=num_units, dropout=dropout, return_sequences=True) outputs = gru_layer(outputs) return outputs
def best_model(): epochs = [5, 10, 15, 20] dropout_rate = [0.1, 0.2, 0.3] list_of_all_scores = list() list_of_scores = list() list_of_dropout = list() list_of_all_dropouts = list() list_of_epochs = list() for i in dropout_rate: model = Sequential() model.add( Embedding(vocab_size, embedding_dim, input_length=max_length)) model.add(GRU(50, return_sequences=True)) model.add(GRU(1, return_sequences=False)) model.add(Dropout(i)) model.add( Dense(1, kernel_regularizer=regularizers.l2(0.01), activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) list_of_dropout.append(i) for e in epochs: list_of_all_dropouts.append(i) list_of_epochs.append(e) model.fit(X_train, y_train, epochs=e, batch_size=128, verbose=1, validation_split=0.2) score = model.evaluate(X_test, y_test, verbose=1) list_of_all_scores.append(score) if score not in list_of_scores: list_of_scores.append(score) #print('Dropout:', i, '\n', 'Epoch:', e, '\n', 'Score:', float(score)) lowest = min(list_of_all_scores) num = list_of_scores.index(lowest) epoch = list_of_epochs[num] dropout = list_of_all_dropouts[num] print('Lowest score:', lowest, 'Epoch:', epoch, 'Dropout', dropout) return epoch, dropout
def NRSCDecoder(x, is_training=False, layer='gru', direction='bidirectional', num_layers=2, hidden_units=400, dropout=0.5): """Definition of Neural Network Decoder for rate-1/2 (2 parity bits/ message bit) Recursive Systematic Convolutional Codes, aka "N-RSC" Decoder. Args: x: - tf.Tensor - shape [batch, sequence_length, 2] represents the noisy signals. is_training: - a boolean layer: str - type of rnn layer (only 'gru' or 'lstm') direction: str 'bidirectional' or 'unidirectional' num_layers - int - number of hidden layers hidden_units: int - number of hidden units per layer dropout: -float - drop out rate during training Returns: x - tf. Tensor - shape [batch, sequence_length, 1] decoded output Raise: ValueError: if `layer` or `direction` is invalid input """ for _ in range(num_layers): if layer == 'gru': inner_layer = GRU(units=hidden_units, return_sequences=True, trainable=is_training, recurrent_dropout=dropout) elif layer == 'lstm': inner_layer = GRU(units=hidden_units, return_sequences=True, trainable=is_training, recurrent_dropout=dropout) else: raise ValueError('Invalid `layer` parameter' '(only GRU or LSTM).') if direction == 'bidirectional': x = Bidirectional(inner_layer)(x) elif direction == 'unidirectional': x = inner_layer(x) else: raise ValueError('Invalid `direction` parameter' '(only bidirectional or unidirectional).') x = BatchNormalization(trainable=is_training)(x) x = TimeDistributed( Dense(units=1, activation='sigmoid', trainable=is_training))(x) return x
def bidirectional_lstm(inputs): with tf.variable_scope('bidirection_gru', reuse=tf.AUTO_REUSE): gru = GRU(units=DEFINES.num_units, return_sequences=True) gru2 = GRU(units=DEFINES.num_units * 2, return_sequences=True) bidirectional = Bidirectional(gru) bidirectional2 = Bidirectional(gru2) output = bidirectional(inputs) output = bidirectional2(output) return output
def set_up_model(self): encoder_input_answers = Input(shape=(None,)) encoder_input_sources = Input(shape=(None,)) encoder_embedding_answers = get_embeddings_layer(self.embeddings_matrix)(encoder_input_answers) encoder_embedding_source = get_embeddings_layer(self.embeddings_matrix)(encoder_input_sources) encoder_input = Add()([encoder_embedding_answers, encoder_embedding_source]) encoder_gru = GRU(self.gru_hidden_states, return_sequences=True, return_state=True) encoder_output, state_h_encoder = encoder_gru(encoder_input) # encoder_states = [state_h, state_c] decoder_input_questions = Input(shape=(None,)) decoder_embedding_questions = get_embeddings_layer(self.embeddings_matrix)(decoder_input_questions) decoder_hidden_states_input = Input(shape=(self.gru_hidden_states,)) # print(decoder_hidden_states_input.shape) decoder_encoder_output_input = Input(shape=(None, encoder_output.shape[2])) # print(decoder_encoder_output_input.shape) attention_layer = BahdanauAttentionLayer(self.attention_layer_units) context_vector, attention_weights = attention_layer((decoder_hidden_states_input, decoder_encoder_output_input)) # context_vector = tf.reshape(context_vector, # [tf.shape(decoder_embedding_questions)[0], tf.shape(decoder_embedding_questions)[1], context_vector.shape[1]]) # print(context_vector.shape ) # print(decoder_embedding_questions.shape) decoder_inputs = tf.concat([tf.expand_dims(context_vector, 1), decoder_embedding_questions], axis=-1) # print(decoder_inputs.shape) # decoder_input_attention = tf.concat([context_vector, decoder_embedding_questions],axis=-1) decoder_gru = GRU(self.gru_hidden_states, return_sequences=True, return_state=True) decoder__gru_outputs, decoder_gru_hidden = decoder_gru(decoder_inputs) decoder__gru_outputs = tf.reshape(decoder__gru_outputs, (-1, decoder__gru_outputs.shape[2])) # decoder_outputs_flattend = Flatten()(decoder_outputs) decoder_dense = Dense(self.vocabulary_size, activation='softmax') decoder_outputs = decoder_dense(decoder__gru_outputs) # model = Model([encoder_input_answers, encoder_input_sources, decoder_input_questions], decoder_outputs) # model.compile(optimizer='rmsprop', loss='categorical_crossentropy') # model.summary() encoder = Model([encoder_input_answers, encoder_input_sources], [encoder_output, state_h_encoder]) encoder.compile(optimizer='rmsprop', loss='categorical_crossentropy') encoder.summary() decoder = Model([decoder_input_questions, decoder_hidden_states_input, decoder_encoder_output_input], [decoder_outputs, decoder_gru_hidden]) decoder.compile(optimizer='rmsprop', loss='categorical_crossentropy') decoder.summary() return encoder, decoder
def model(hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize): print( "hidden_size:%r, batch_size:%r, en_timesteps:%r, en_vsize:%r, fr_timesteps:%r, fr_vsize:%r" % (hidden_size, batch_size, en_timesteps, en_vsize, fr_timesteps, fr_vsize)) encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, fr_timesteps - 1, fr_vsize), name='decoder_inputs') masked_encoder_inputs = Masking(0)(encoder_inputs) masked_decoder_inputs = Masking(0)(decoder_inputs) # 编码器 encoder_bi_gru = Bidirectional(GRU(units=hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_bi_gru( masked_encoder_inputs) # 解码器 decoder_gru = GRU(units=hidden_size * 2, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru( inputs=masked_decoder_inputs, initial_state=Concatenate(axis=-1)( [encoder_fwd_state, encoder_back_state])) # Dense layer dense = Dense(fr_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_out) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy') full_model.summary() return full_model
def get_decoder(self, encoder_output, dim, state_size=128, embedding_size=128): decoder_initial_state = Input(shape=(state_size, ), name='decoder_initial_state') decoder_input = Input(shape=(None, ), name='decoder_input') decoder_embedding = Embedding(input_dim=dim, output_dim=embedding_size, name='decoder_embedding') decoder_gru1 = GRU(state_size, name='decoder_gru1', return_sequences=True) decoder_gru2 = GRU(state_size, name='decoder_gru2', return_sequences=True) decoder_gru3 = GRU(state_size, name='decoder_gru3', return_sequences=True) decoder_dense = Dense(dim, activation='linear', name='decoder_output') def connect_decoder(initial_state): # Start the decoder-network with its input-layer. net = decoder_input # Connect the embedding-layer. net = decoder_embedding(net) # Connect all the GRU-layers. net = decoder_gru1(net, initial_state=initial_state) net = decoder_gru2(net, initial_state=initial_state) net = decoder_gru3(net, initial_state=initial_state) # Connect the final dense layer that converts to # one-hot encoded arrays. decoder_output = decoder_dense(net) return decoder_output decoder_output = connect_decoder(initial_state=encoder_output) return decoder_initial_state, decoder_input, decoder_output, connect_decoder
def CRNN(input_shape): Input_Tr = Input(input_shape, dtype='float', name='Input_Tr') conv_layer1 = Conv2D(32, kernel_size=3, strides=1, padding='SAME')(Input_Tr) batch_layer1 = BatchNormalization(axis=-1)(conv_layer1) conv_layer1_out = Activation('relu')(batch_layer1) pooling_layer1 = MaxPooling2D((1, 4))(conv_layer1_out) dropout_layer1 = Dropout(0.5)(pooling_layer1) conv_layer2 = Conv2D(64, kernel_size=3, strides=1, padding='SAME')(dropout_layer1) batch_layer2 = BatchNormalization(axis=-1)(conv_layer2) conv_layer2_out = Activation('relu')(batch_layer2) pooling_layer2 = MaxPooling2D((1, 4))(conv_layer2_out) dropout_layer2 = Dropout(0.5)(pooling_layer2) print(dropout_layer2.shape) reshape_layer3 = Reshape( (600, 64 * int(round(n_mel / 4 / 4))))(dropout_layer2) print(reshape_layer3.shape) bidir_layer3 = Bidirectional( GRU(64, return_sequences=True, activation='tanh'))(reshape_layer3) output = TimeDistributed(Dense(1, activation='sigmoid'))(bidir_layer3) model = Model(inputs=[Input_Tr], outputs=[output]) return model
def get_bidirectional_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False): """ follow the common model construction step shown in keras manual :param pre_embeddings: :param dp_rate: drop out rate :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) model = Sequential() model.add(embedding_layer) if use_lstm: model.add(Bidirectional(LSTM(RNN_DIM, recurrent_dropout=dp_rate))) else: model.add(Bidirectional(GRU(RNN_DIM, recurrent_dropout=dp_rate))) # model.add(Dropout(dp_rate)) model.add(Dense(self.class_num, activation=self.last_activation)) return model
def get_naive_version_model(self, pre_embeddings, dp_rate=0.0, use_lstm=False): """ different ways to construct a model from the common ways shown in keras manual Are these two ways in constructing a model equivalent? :param pre_embeddings: :param dp_rate: drop out rate :param use_lstm: utilize LSTM or GRU unit :return: the model """ # Embedding part can try multichannel as same as origin paper embedding_layer = Embedding(self.max_features, # 字典长度 self.embedding_dims, # 词向量维度 weights=[pre_embeddings], # 预训练的词向量 input_length=self.maxlen, # 每句话的最大长度 trainable=False # 是否在训练过程中更新词向量 ) input = Input((self.maxlen,)) embedding = embedding_layer(input) if use_lstm: # x = LSTM(RNN_DIM, dropout=0.2, recurrent_dropout=0.2) # LSTM x = LSTM(RNN_DIM, recurrent_dropout=dp_rate)(embedding) # LSTM else: x = GRU(RNN_DIM, recurrent_dropout=dp_rate)(embedding) # GRU output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def bd_model(input_shape, output_sequence_length, english_vocab_size, spanish_vocab_size): """ Build and train a bidirectional RNN model on x and y :param input_shape: Tuple of input shape :param output_sequence_length: Length of output sequence :param english_vocab_size: Number of unique English words in the dataset :param french_vocab_size: Number of unique French words in the dataset :return: Keras model built, but not trained """ # TODO: Implement # Hyperparameters #learning_rate = 0.003 # TODO: Build the layers model = Sequential() model.add( Bidirectional(GRU(128, return_sequences=True), input_shape=input_shape[1:])) model.add(TimeDistributed(Dense(512, activation='relu'))) #model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(spanish_vocab_size, activation='softmax'))) # Compile model model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) return model
def Create_CNN(self): """ """ inp = Input(shape=(self.max_len, )) embedding = Embedding(self.max_token, self.embedding_dim, weights=[self.embedding_weight], trainable=not self.fix_wv_model) x = embedding(inp) if self.emb_dropout > 0: x = SpatialDropout1D(self.emb_dropout)(x) # if self.char_split: # # First conv layer # x = Conv1D(filters=128, kernel_size=3, strides=2, padding="same")(x) cnn_list = [] rnn_list = [] for filter_size in self.filter_size: if filter_size > 0: conc = self.ConvBlock(x, filter_size) cnn_list.append(conc) for rnn_unit in self.context_vector_dim: if rnn_unit > 0: rnn_maps = Bidirectional(GRU(rnn_unit, return_sequences=True, \ dropout=self.rnn_input_dropout, recurrent_dropout=self.rnn_state_dropout))(x) conc = self.pooling_blend(rnn_maps) rnn_list.append(conc) conc_list = cnn_list + rnn_list if len(conc_list) == 1: conc = Lambda(lambda x: x, name='RCNN_CONC')(conc_list) else: conc = Concatenate(name='RCNN_CONC')(conc_list) # conc = self.pooling_blend(x) if self.separate_label_layer: for i in range(self.num_classes): full_connect = self.full_connect_layer(conc) proba = Dense(1, activation="sigmoid")(full_connect) if i == 0: outp = proba else: outp = concatenate([outp, proba], axis=1) else: if self.hidden_dim[0] > 0: full_connect = self.full_connect_layer(conc) else: full_connect = conc # full_conv_0 = self.act_blend(full_conv_pre_act_0) # full_conv_pre_act_1 = Dense(self.hidden_dim[1])(full_conv_0) # full_conv_1 = self.act_blend(full_conv_pre_act_1) # flat = Flatten()(conc) outp = Dense(6, activation="sigmoid")(full_connect) model = Model(inputs=inp, outputs=outp) # print (model.summary()) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model