def build_model(units, inputs_dim, output="regression", sparse_dim=[], with_ts=False, ts_maxlen=0): assert output == "regression" or output == "binary_clf", "This output type is not supported." assert len(sparse_dim) == inputs_dim[1], "Dimension not match." # Inputs for basic features. inputs1 = Input(shape=(inputs_dim[0], ), name="basic_input") x1 = Dense(units, kernel_regularizer='l2', activation="relu")(inputs1) # Inputs for long one-hot features. inputs2 = Input(shape=(inputs_dim[1], ), name="one_hot_input") for i in range(len(sparse_dim)): if i == 0: x2 = Embedding(sparse_dim[i], units, mask_zero=True)(slice(inputs2, i)) else: tmp = Embedding(sparse_dim[i], units, mask_zero=True)(slice(inputs2, i)) x2 = Concatenate()([x2, tmp]) x2 = tf.reshape(x2, [-1, units * inputs_dim[1]]) x = Concatenate()([x1, x2]) if with_ts: inputs3 = Input(shape=( None, inputs_dim[2], ), name="ts_input") x3 = LSTM(units, input_shape=(ts_maxlen, inputs_dim[2]), return_sequences=0)(inputs3) x = Concatenate()([x, x3]) x = Dense(units, kernel_regularizer='l2', activation="relu")(x) x = Dropout(0.5)(x) x = Dense(units, kernel_regularizer='l2', activation="relu")(x) x = Dropout(0.5)(x) if output == "regression": x = Dense(1, kernel_regularizer='l2')(x) model = Model(inputs=[inputs1, inputs2], outputs=x) if with_ts: model = Model(inputs=[inputs1, inputs2, inputs3], outputs=x) model.compile(optimizer='adam', loss='mean_squared_error') elif output == "binary_clf": x = Dense(1, kernel_regularizer='l2', activation="sigmoid")(x) model = Model(inputs=[inputs1, inputs2], outputs=x) if with_ts: model = Model(inputs=[inputs1, inputs2, inputs3], outputs=x) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) #model.summary() return model
def enc_dec(src_max_len, tgt_max_len, src_token_size, tgt_token_size, latent_dim=128): """Get the empty encoder and decoder.""" rd = RandomUniform(minval=-0.08, maxval=0.08, seed=None) encoder_inputs = Input(shape=(None, ), name='encoder_inputs') encoder_embedding = Embedding(src_token_size, latent_dim, embeddings_initializer=rd, input_length=None, mask_zero=True, name='encoder_emb')(encoder_inputs) encoder_time, encoder_state_h = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_state=True, return_sequences=True, name='forward')(encoder_embedding) encoder_model = Model(encoder_inputs, [encoder_state_h, encoder_time]) decoder_inputs = Input(shape=(None, ), name='decoder_inputs') decoder_embedding = Embedding(tgt_token_size, latent_dim, embeddings_initializer=rd, input_length=None, name='decoder_emb') #(decoder_inputs) decoder_gru = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_sequences=True, return_state=True, name='decoder_gru') decoder_dense = Dense(tgt_token_size, kernel_initializer=rd, bias_initializer=rd, activation='softmax', name='output_dense') decoder_state_input_h = Input(shape=(latent_dim, ), name='decoder_input_h') decoder_outputs, state_h = decoder_gru(decoder_embedding(decoder_inputs), initial_state=decoder_state_input_h) decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model([decoder_inputs] + [decoder_state_input_h], [decoder_outputs] + [state_h]) return encoder_model, decoder_model
def build_decoder_model_without_argmax(seq2seq, input_t, output_t): # Remove all initializer. input_state = Input(shape=(seq2seq.units, ), name="decoder_state") decoder_inputs = Input(shape=(None, ), name="decoder_input") decoder_embedding = Embedding(seq2seq.tgt_token_size, seq2seq.units, input_length=None, name="decoder_emb") decoder_gru = GRU(seq2seq.units, return_sequences=True, return_state=True, name="decoder_gru") decoder_dense = Dense(seq2seq.tgt_token_size, activation="softmax", name="output_dense") state = input_state for t in range(input_t, output_t): inputs = Lambda(slice, arguments={"index": t})( decoder_inputs) # Count encoder output as time 0. inputs_embedding = decoder_embedding(inputs) decoder_outputs_time, state = decoder_gru(inputs_embedding, initial_state=state) if input_t == output_t: decoder_outputs_time = Lambda(lambda x: K.expand_dims(x, axis=1))( state) softmax = decoder_dense(decoder_outputs_time) decoder_model = Model([decoder_inputs, input_state], [softmax] + [state]) return decoder_model
def gru_keras(max_features, maxlen, bidirectional, dropout_rate, embed_dim, rec_units, mtype='GRU', reduction=None, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) if reduction: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=True))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=True)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=True))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=True)(x) if reduction == 'average': x = GlobalAveragePooling1D()(x) elif reduction == 'maximum': x = GlobalMaxPool1D()(x) else: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=False))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=False)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=False))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=False)(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def NN_huaweiv1(maxlen, embedding_matrix=None, class_num1=17, class_num2=12): emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) x1 = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform") x1 = sdrop(x1) lstm1 = lstm_layer(x1) gru1 = gru_layer(lstm1) att_1 = Attention(maxlen)(lstm1) att_2 = Attention(maxlen)(gru1) cnn1 = cnn1d_layer(lstm1) avg_pool = GlobalAveragePooling1D() max_pool = GlobalMaxPooling1D() x1 = concatenate([ att_1, att_2, Attention(maxlen)(cnn1), avg_pool(cnn1), max_pool(cnn1) ]) x = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()( Dense(128)(x1)))) x = Activation(activation="relu")(BatchNormalization()(Dense(64)(x))) pred1_d = Dense(class_num1)(x) pred1 = Activation(activation='sigmoid', name='pred1')(pred1_d) y = concatenate([x1, x]) y = Activation(activation="relu")(BatchNormalization()(Dense(64)(x))) pred2_d = Dense(class_num2)(y) pred2 = Activation(activation='sigmoid', name='pred2')(pred2_d) z = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()( Dense(128)(x1)))) z = concatenate([pred1_d, pred2_d, z]) pred3 = Dense(class_num1 + class_num2, activation='sigmoid', name='pred3')(z) model = Model(inputs=seq1, outputs=[pred1, pred2, pred3]) return model
def NN_huaweiv1(maxlen, embedding_matrix=None, class_num1=17, class_num2=12): emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) emb = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform") sd = sdrop(emb) lstm1 = lstm_layer(sd) gru1 = gru_layer(lstm1) cnn1 = cnn1d_layer(gru1) gru1 = concatenate([lstm1, gru1, cnn1]) att_1 = Attention(maxlen)(gru1) att_2 = Attention(maxlen)(gru1) att_3 = Attention(maxlen)(gru1) att_4 = Attention(maxlen)(gru1) x1 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_1))) x2 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_2))) x3 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_3))) x4 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_4))) pred1_1 = Dense(class_num1 - 10, activation='sigmoid')(x1) pred1_2 = Dense(10, activation='sigmoid')(x2) pred1 = concatenate([pred1_1, pred1_2], axis=-1, name='pred1') pred2_1 = Dense(class_num2 - 9, activation='sigmoid')(x3) pred2_2 = Dense(9, activation='sigmoid')(x4) pred2 = concatenate( [pred2_1, pred2_2], axis=-1, name='pred2' ) # Dense(class_num2, activation='sigmoid',name='pred2')(y) model = Model(inputs=seq1, outputs=[pred1, pred2]) return model
def spatial_block(name, space, cfg): inpt = Input(space.shape, name=name + '_input') block = tf.split(inpt, space.shape[0], axis=1) for i, (name, dim) in enumerate(zip(space.spatial_feats, space.spatial_dims)): if dim > 1: block[i] = tf.squeeze(block[i], axis=1) # Embedding dim 10 as per https://arxiv.org/pdf/1806.01830.pdf block[i] = Embedding(input_dim=dim, output_dim=10)(block[i]) # [N, H, W, C] -> [N, C, H, W] block[i] = tf.transpose(block[i], perm=[0, 3, 1, 2]) else: block[i] = tf.log(block[i] + 1e-5) block = tf.concat(block, axis=1) block = Conv2D(16, 5, **cfg)(block) block = Conv2D(32, 3, **cfg)(block) return block, inpt
def build_lstm_lm(input_shape, output_size): # LM datasets will report the vocab_size as output_size vocab_size = output_size model = Sequential([ Embedding(vocab_size + 1, 64, mask_zero=True, input_length=input_shape[0]), LSTM(256, unroll=True, return_sequences=True), LSTM(256, unroll=True), Dense(output_size), Activation("softmax") ]) model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model
def cnn_keras(max_features, maxlen, dropout_rate, embed_dim, num_filters=300, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) x = Conv1D(num_filters, 7, activation='relu', padding='same')(x) x = GlobalMaxPooling1D()(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def create_lstm(): # create the model embedding_vecor_length = 32 model = Sequential(name="lstm") # model.name = 'lstm' model.add( Embedding(top_words, embedding_vecor_length, input_length=max_review_length)) model.add( Conv1D(filters=32, kernel_size=3, padding="same", activation="relu")) model.add(MaxPooling1D(pool_size=2)) model.add(LSTM(10, name="lstm1", return_sequences=True)) model.add(LSTM(32, name="lstm2", return_sequences=True)) model.add(LSTM(64, name="lstm3", return_sequences=True)) model.add(LSTM(128, name="lstm4", return_sequences=True)) model.add(LSTM(48, name="lstm5")) model.add(Dense(1, activation="sigmoid")) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) return model
def seq2seq(src_max_len, tgt_max_len, src_token_size, tgt_token_size, latent_dim=128, teacher_forcing_ratio=0.5): rd = RandomUniform(minval=-0.08, maxval=0.08, seed=None) encoder_inputs = Input(shape=(None, ), name='encoder_inputs') print('(Build model) encoder_inputs =', encoder_inputs.shape) encoder_embedding = Embedding(src_token_size, latent_dim, embeddings_initializer=rd, input_length=None, mask_zero=True, name='encoder_emb')(encoder_inputs) print('(Build model) encoder_embedding =', encoder_embedding.shape) encoder_time, encoder_state_h = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_state=True, return_sequences=True, name='forward')(encoder_embedding) print("(Build model) encoder_state_h =", encoder_state_h.shape) encoder_model = Model(encoder_inputs, [encoder_state_h, encoder_time]) decoder_inputs = Input(shape=(None, ), name='decoder_inputs') print('(Build model) decoder_inputs =', decoder_inputs.shape) decoder_embedding = Embedding(tgt_token_size, latent_dim, embeddings_initializer=rd, input_length=None, name='decoder_emb') decoder_gru = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_sequences=True, return_state=True, name='decoder_gru') decoder_dense = Dense(tgt_token_size, kernel_initializer=rd, bias_initializer=rd, activation='softmax', name='output_dense') inputs = Lambda(slice, arguments={'h1': 0})(decoder_inputs) softmax_state = [] teacher_forcing = Input(shape=(None, ), ) decoder_state_h = encoder_state_h # Run decoder on each timestep. for i in range(tgt_max_len): inputs_embed = decoder_embedding(inputs) decoder_outputs_time, state_h = decoder_gru( inputs_embed, initial_state=decoder_state_h) softmax = decoder_dense(decoder_outputs_time) outputs = Lambda(lambda x: K.argmax(x))(softmax) outputs = Lambda(lambda x: K.cast(outputs, 'float32'))(outputs) decoder_inputs_time = Lambda(slice, arguments={'h1': i + 1})(decoder_inputs) inputs = Lambda(where, arguments={'ratio': teacher_forcing_ratio})( [teacher_forcing, decoder_inputs_time, outputs]) # inputs = Lambda(where, arguments={'ratio': 0.5})([teacher_forcing, outputs, decoder_inputs_time]) decoder_state_h = state_h softmax_state += [softmax] decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(softmax_state) # Define the model that will turn "encoder_input_data" & "decoder_input_data" into "decoder_target_data". model = Model([encoder_inputs, decoder_inputs, teacher_forcing], decoder_outputs) # model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # model.summary() decoder_state_input_h = Input(shape=(latent_dim, ), name='decoder_input_h') decoder_outputs, state_h = decoder_gru(decoder_embedding(decoder_inputs), initial_state=decoder_state_input_h) print('(Build model) decoder_outputs =', decoder_outputs) decoder_outputs = decoder_dense(decoder_outputs) print('(Build model) decoder_outputs =', decoder_outputs) decoder_model = Model([decoder_inputs] + [decoder_state_input_h], [decoder_outputs] + [state_h]) encoder_model.summary() decoder_model.summary() return model, encoder_model, decoder_model
def __get_model__(self): self.enc_inp = Input(shape=(self.cfg.input_seq_len(), ), name="Encoder-Input") embd = Embedding(self.cfg.num_input_tokens(), self.cfg.latent_dim(), name='Encoder-Embedding', mask_zero=False) embd_outp = embd(self.enc_inp) x = BatchNormalization(name='Encoder-Batchnorm-1')(embd_outp) _, state_h = GRU(self.cfg.latent_dim(), return_state=True, name='Encoder-Last-GRU')(x) self.enc_model = Model(inputs=self.enc_inp, outputs=state_h, name='Encoder-Model') self.enc_outp = self.enc_model(self.enc_inp) self.cfg.logger.info("********** Encoder Model summary **************") self.cfg.logger.info(self.enc_model.summary()) # get the decoder self.dec_inp = Input(shape=(None, ), name='Decoder-Input') dec_emb = Embedding(self.cfg.num_output_tokens(), self.cfg.latent_dim(), name='Decoder-Embedding', mask_zero=False)(self.dec_inp) dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = GRU(self.cfg.latent_dim(), return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=self.enc_outp) x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output) dec_dense = Dense(self.cfg.num_output_tokens(), activation='softmax', name='Final-Output-Dense') self.dec_outp = dec_dense(x) model_inp = [self.enc_inp, self.dec_inp] self.model = Model(model_inp, self.dec_outp) self.cfg.logger.info("********** Full Model summary **************") self.cfg.logger.info(str(self.model.summary())) plot_model(self.model, to_file=self.cfg.scratch_dir() + os.sep + "seq2seq.png")
def tokens_to_string(tokens): # Map from tokens back to words. words = [inverse_map[token] for token in tokens if token != 0] # Concatenate all words. text = " ".join(words) return text #Create the RNN model = Sequential() embedding_size = 8 model.add( Embedding(input_dim=num_words, output_dim=embedding_size, input_length=max_tokens, name='layer_embedding')) model.add(GRU(units=16, return_sequences=True)) model.add(GRU(units=8, return_sequences=True)) model.add(GRU(units=4)) model.add(Dense(1, activation='sigmoid')) optimizer = Adam(lr=1e-3) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) x = np.array(x_train_pad) y = np.array(y_train) model.fit(x, y, validation_split=0.06, epochs=3, batch_size=64)