def build_model(): opt = optimizers.RMSprop(lr=LEARNING_RATE) feat_input = Input(shape=(OBSERVE_LENGTH, FEAT_DIM)) img_input_0 = Input(shape=(OBSERVE_LENGTH, IMG_DIM)) img_input_1 = Input(shape=(OBSERVE_LENGTH, IMG_DIM)) img_input_2 = Input(shape=(OBSERVE_LENGTH, IMG_DIM)) img_input_3 = Input(shape=(OBSERVE_LENGTH, IMG_DIM)) img_input_4 = Input(shape=(OBSERVE_LENGTH, IMG_DIM)) #encoder_feat encoder_feat = layers.GRU(N_HIDDEN, input_shape=(OBSERVE_LENGTH, FEAT_DIM), return_sequences=False, stateful=False, dropout=0.2)(feat_input) encoder_img_0 = layers.GRU(N_HIDDEN, input_shape=(OBSERVE_LENGTH, IMG_DIM), return_sequences=False, stateful=False, dropout=0.2)(img_input_0) encoder_img_1 = layers.GRU(N_HIDDEN, input_shape=(OBSERVE_LENGTH, IMG_DIM), return_sequences=False, stateful=False, dropout=0.2)(img_input_1) encoder_img_2 = layers.GRU(N_HIDDEN, input_shape=(OBSERVE_LENGTH, IMG_DIM), return_sequences=False, stateful=False, dropout=0.2)(img_input_2) encoder_img_3 = layers.GRU(N_HIDDEN, input_shape=(OBSERVE_LENGTH, IMG_DIM), return_sequences=False, stateful=False, dropout=0.2)(img_input_3) encoder_img_4 = layers.GRU(N_HIDDEN, input_shape=(OBSERVE_LENGTH, IMG_DIM), return_sequences=False, stateful=False, dropout=0.2)(img_input_4) concated = layers.concatenate([ encoder_img_0, encoder_img_1, encoder_img_2, encoder_img_3, encoder_img_4, encoder_feat ]) rv = layers.RepeatVector(PREDICT_LENGTH)(concated) #lstm decoder decoder = layers.GRU(N_HIDDEN, return_sequences=True, stateful=False, dropout=0.2)(rv) dense = layers.TimeDistributed(layers.Dense(3), input_shape=(PREDICT_LENGTH, None))(decoder) out = layers.Activation('linear')(dense) model = Model(inputs=[ img_input_0, img_input_1, img_input_2, img_input_3, img_input_4, feat_input ], outputs=[out]) model.compile(loss='mse', optimizer=opt) print(model.summary()) return model
""" X = list() Y = list() X = [x for x in range(5, 301, 5)] Y = [y for y in range(20, 316, 5)] X = np.array(X).reshape(20, 3, 1) Y = np.array(Y).reshape(20, 3, 1) model = Sequential() # encoder layer model.add(layers.LSTM(100, activation='relu', input_shape=(3, 1))) # repeat vector model.add(layers.RepeatVector(3)) # decoder layer model.add(layers.LSTM(100, activation='relu', return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(1))) model.compile(optimizer='adam', loss='mse') print(model.summary()) history = model.fit(X, Y, epochs=30, validation_split=0.2, verbose=0, batch_size=3) plt.plot(history.history['val_loss'])
mult_o, mask) # [num_vars, batch_size, <timestep, num_variable_inputs] mean = tf.reduce_mean(masked, -2) # [num_vars, batch_size, num_variable_inputs] mean = mean.to_tensor() var1 = tf.math.reduce_variance(mean, 0) # [batch_size, num_variable_inputs] mean = tf.expand_dims(mean, -2) # [num_vars, batch_size, 1, num_variable_inputs] var2 = tf.reduce_mean(tf.square(masked - mean), -2) # [num_vars, batch_size, num_variable_inputs] loss = tf.reduce_sum(var2) + tf.reduce_sum( tf.square(operators_diff)) - tf.reduce_sum(var1) return loss encoder_input = layers.Input(shape=(None, num_inputs)) encoder = layers.LSTM(latent_dim, activation="relu")(encoder_input) decoder_input = layers.RepeatVector(tf.shape(encoder_input)[1])(encoder) decoder = layers.LSTM(latent_dim, activation="relu", return_sequences=True)(decoder_input) decoder_output = layers.TimeDistributed(layers.Dense(num_inputs))(decoder) autoencoder = tf.keras.models.Model(encoder_input, decoder_output) autoencoder.compile(optimizer="adam", loss=expression_loss) # Embedding
def repeat_vector(args): layer_to_repeat = args[0] sequence_layer = args[1] return layers.RepeatVector(K.shape(sequence_layer)[1])(layer_to_repeat)
outputs, labels = inputs idxs_for_mask = tf.ones(tf.shape(outputs), dtype = tf.int32) * np.arange(outputs.shape[-1]) lims_for_mask = tf.repeat(tf.expand_dims(labels, 2), outputs.shape[-1], 2) return outputs * tf.cast(idxs_for_mask < lims_for_mask, tf.float32) if architecture == 1: input_layer = layers.Input((t_in, n_c_max + 1)) signals, labels = Splitter()(input_layer) encoder = layers.Bidirectional( layers.LSTM(64) )(signals) repeater = layers.RepeatVector(t_out)(encoder) decoder1 = layers.LSTM(128, return_sequences = True)(repeater) decoder2 = layers.Bidirectional( layers.LSTM(64, return_sequences = True) )(decoder1) decoder3 = layers.Bidirectional( layers.LSTM(64, return_sequences = True) )(decoder2) regressor = layers.Dense(n_c_max)(decoder3) cleaner = Combiner()([regressor, labels]) model = keras.Model(input_layer, cleaner) model.compile(loss = 'mse', optimizer = 'adam')
""" ## Build the model """ print("Build model...") num_layers = 1 # Try to add more LSTM layers! model = keras.Sequential() # "Encode" the input sequence using a LSTM, producing an output of size 128. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). model.add(layers.LSTM(128, input_shape=(MAXLEN, len(chars)))) # As the decoder RNN's input, repeatedly provide with the last output of # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum # length of output, e.g., when DIGITS=3, max output is 999+999=1998. model.add(layers.RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single layer. for _ in range(num_layers): # By setting return_sequences to True, return not only the last output but # all the outputs so far in the form of (num_samples, timesteps, # output_dim). This is necessary as TimeDistributed in the below expects # the first dimension to be the timesteps. model.add(layers.LSTM(128, return_sequences=True)) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add(layers.Dense(len(chars), activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model.summary()
def model_constructor(self, input_data): # defines the input encoder_input = layers.Input(shape=(self.data_shape[1:])) X = layers.Flatten()(encoder_input) X = layers.RepeatVector(1)(X) X = layers.Permute((2, 1))(X) # X = encoder_input for i in range(self.num_ident_blocks): X = self.identity_block(X, 'encoder', string.ascii_uppercase[i + 1]) # This is in preparation for the embedding layer X = layers.Bidirectional(LSTM(self.layer_size, return_sequences=False, dropout=self.drop_frac, activity_regularizer=l1(self.l1_norm)), input_shape=(self.data_shape[1] * 2, 1))(X) # X = layers.BatchNormalization(axis=1, name='last_encode')(X) X = layers.Activation('relu')(X) # if self.VAE: # if self.VAE: X = layers.Dense(self.embedding, name="embedding_pre")(X) X = layers.Activation('relu')(X) Embedding_out = layers.ActivityRegularization( l1=self.l1_norm_embedding * 10**(self.coef))(X) z_mean = layers.Dense(self.embedding, name="z_mean")(Embedding_out) z_log_var = layers.Dense(self.embedding, name="z_log_var")(Embedding_out) # update the self.mean and self.std: # self.mean = z_mean # self.std = z_log_var self.encoder_model = Model(inputs=encoder_input, outputs=[Embedding_out, z_mean, z_log_var], name='LSTM_encoder') # decoder_input = layers.Input(shape=(self.embedding,), name="z_sampling") decoder_mean = layers.Input(shape=(self.embedding, ), name="z_mean") decoder_log = layers.Input(shape=(self.embedding, ), name="z_log") sampling = Sampling()((decoder_mean, decoder_log)) # self.encoder_model = Model(inputs=encoder_input, # outputs=sampling, name='LSTM_encoder') z = layers.Dense(self.embedding, name="embedding")(sampling) z = layers.Activation('relu')(z) z = layers.ActivityRegularization(l1=self.l1_norm_embedding * 10**(self.coef))(z) X = layers.RepeatVector(self.data_shape[1])(z) X = layers.Bidirectional( LSTM(self.layer_size, return_sequences=True, dropout=self.drop_frac, activity_regularizer=l1(self.l1_norm)))(X) # X = layers.BatchNormalization(axis = 1, name = 'fires_decode')(X) X = layers.Activation('relu')(X) for i in range(self.num_ident_blocks): X = self.identity_block(X, 'decoder', string.ascii_uppercase[i + 1]) # X = layers.LayerNormalization(axis=1, name='batch_normal')(X) X = layers.TimeDistributed(Dense(2, activation='linear'))(X) self.decoder_model = Model(inputs=[decoder_mean, decoder_log], outputs=X, name='LSTM_encoder') outputs = self.decoder_model([z_mean, z_log_var]) self.vae = tf.keras.Model(inputs=encoder_input, outputs=outputs, name="vae") # Add KL divergence regularization loss. kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1) self.vae.add_loss(self.coef * kl_loss)
def lstmAutoencoder( trainingData, validationData, sequenceLength, featureCount, encodingDimension, hiddenDimension=None, batchSize=256, epochs=200, learningRate=0.0002, lossFunction='mse', metrics=['mae', 'mse'], validationSteps=3, ): inputData = layers.Input(shape=(sequenceLength, featureCount)) encoded = inputData if hiddenDimension is not None: encoded = layers.LSTM( hiddenDimension, activation='tanh', recurrent_activation='sigmoid', recurrent_dropout=0, return_sequences=True, unroll=False, use_bias=True, )(encoded) encoded = layers.LSTM( encodingDimension, activation='tanh', recurrent_activation='sigmoid', recurrent_dropout=0, return_sequences=False, unroll=False, use_bias=True, )(encoded) x = layers.RepeatVector( sequenceLength, name='repeat-layer', )(encoded) x = layers.LSTM( encodingDimension, activation='tanh', recurrent_activation='sigmoid', recurrent_dropout=0, return_sequences=True, unroll=False, use_bias=True, )(x) if hiddenDimension is not None: x = layers.LSTM( hiddenDimension, activation='tanh', recurrent_activation='sigmoid', recurrent_dropout=0, return_sequences=True, unroll=False, use_bias=True, )(x) decoded = layers.TimeDistributed(layers.Dense(featureCount))(x) autoencoder = Model(inputData, decoded) encodedInput = layers.Input(shape=(encodingDimension, )) repeatLayer = autoencoder.get_layer('repeat-layer') decoderLayer = None if hiddenDimension is not None: decoderLayer = autoencoder.layers[-1](autoencoder.layers[-2]( autoencoder.layers[-3](repeatLayer(encodedInput)))) else: decoderLayer = autoencoder.layers[-1](autoencoder.layers[-2]( repeatLayer(encodedInput))) encoder = Model(inputData, encoded) decoder = Model(encodedInput, decoderLayer) autoencoder.compile( optimizer=tf.keras.optimizers.Nadam(learningRate), loss=lossFunction, metrics=metrics, ) autoencoder.fit( trainingData, trainingData, batch_size=batchSize, epochs=epochs, validation_data=(validationData, validationData), validation_steps=validationSteps, shuffle=True, ) return autoencoder, encoder, decoder
x = np.array(x).reshape((-1, 15, 5)) y = np.array(y).reshape((-1, 5, 1)) x_test = x[-150:] y_test = y[-150:] x_val = x[-320:-170] y_val = y[-320:-170] x_train = x[:-340] y_train = y[:-340] x_train.shape # %% # build LSTM model inputs = layers.Input(shape=(15, 5)) x = layers.LSTM(16, dropout=0.3, recurrent_dropout=0.0, return_sequences=False)(inputs) x = layers.RepeatVector(5)(x) x = layers.LSTM(16, dropout=0.3, recurrent_dropout=0.0, return_sequences=True)(x) outputs = layers.Dense(1)(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) model.summary() plot_model(model, show_shapes=True) # %% # fit model model.compile(loss='mse', optimizer='Adam') callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='auto', patience=5, verbose=1) model.fit(x_train,
else: timesteps = data.shape[1] input_size = data.shape[2] # Create encoder inputs = keras.Input(shape=(timesteps, input_size)) x = layers.LSTM(ltsm_encode, activation='relu')(inputs) # Sampling Layers z_mean = layers.Dense(latent_dim, name="z_mean")(x) z_log_var = layers.Dense(latent_dim, name="z_log_var")(x) z = Sampling()([z_mean, z_log_var]) encoder = keras.Model(inputs, [z_mean, z_log_var, z], name="encoder") # Create Decoder input_latent = keras.Input(shape=(latent_dim, )) decoder1 = layers.RepeatVector(timesteps)(input_latent) decoder1 = layers.LSTM(ltsm_decode, activation='sigmoid', return_sequences=True)(decoder1) decoder1 = layers.TimeDistributed(layers.Dense(input_size))(decoder1) decoder = keras.Model(input_latent, decoder1) vae = VAE(encoder, decoder) vae.compile(keras.optimizers.Adam(learning_rate=0.001)) es = EarlyStopping(monitor='loss', mode='min', verbose=0, patience=5) train_data = x_train.reshape(x_train.shape[0], data.shape[1], data.shape[2]) test_data = x_test.reshape(x_test.shape[0], data.shape[1], data.shape[2]) print(type(train_data))
def build_1d_model(args): l2r = 1e-9 T, X = tfkl.Input((N_TOKS,)), tfkl.Input((MAX_OBJS, 3 + N_OBJS)) # print('T: ', T.shape) # print('X: ', X.shape) ti = tfkl.Embedding(N_VOCAB, N_EMBED, input_length=N_TOKS)(T) # print('ti :', ti.shape) th = tfkm.Sequential([ tfkl.Bidirectional(tfkl.LSTM(128, return_sequences=True)), tfkl.Bidirectional(tfkl.LSTM(128, return_sequences=True)), tfkl.Conv1D(256, (1,), activation='elu', kernel_regularizer=tfkr.l2(l2r)), tfkl.Conv1D(6, (1,), activation=None, kernel_regularizer=tfkr.l2(l2r)), tfkl.Softmax(axis=-2, name='lstm_attn'), ], name='lstm_layers')(ti) # print('th: ', th.shape) tia = tfkb.sum(tfkl.Reshape((N_TOKS, 1, -1))(th) * tfkl.Reshape((N_TOKS, N_EMBED, 1))(ti), axis=-3) # print('tia: ', tia.shape) Xi = tfkb.sum(X[:, :, 3:], axis=-1, keepdims=True) # print('Xi: ', Xi.shape) s1 = tfkl.Dense(N_OBJS, activation='softmax')(tia[:, :, 0]) s1b = tfkm.Sequential([tfkl.RepeatVector(MAX_OBJS), tfkl.Reshape((MAX_OBJS, N_OBJS))])(s1) Xs1 = tfkb.sum(X[:, :, 3:] * s1b, axis=-1, keepdims=True) # print('s1: ', s1.shape) # print('s1b: ', s1b.shape) # print('Xs1: ', Xs1.shape) s2 = tfkl.Dense(3)(tia[:, :, 1]) s2b = tfkm.Sequential([tfkl.RepeatVector(MAX_OBJS), tfkl.Reshape((MAX_OBJS, 3))])(s2) s2c = tfkb.sum(s2b * X[:, :, 2:3] - (1 - Xi) * 20, axis=-1, keepdims=True) Xs2 = tfkm.Sequential([tfkl.Reshape((-1, 1)), tfkl.Softmax(axis=-2), tfkl.Reshape((MAX_OBJS, 1))])(s2c) Xs2 = Xs2 - tfkb.max(Xs2, axis=[1, 2], keepdims=True) # print('Xs2: ', Xs2.shape) s3 = tfkl.Dense(N_OBJS, activation='softmax')(tia[:, :, 2]) s3b = tfkm.Sequential([tfkl.RepeatVector(MAX_OBJS), tfkl.Reshape((MAX_OBJS, N_OBJS))])(s3) Xs3 = tfkb.sum(X[:, :, 3:] * s3b, axis=-1, keepdims=True) s4 = tfkl.Dense(16, activation='softmax')(tia[:, :, 3]) s4b = tfkm.Sequential([tfkl.RepeatVector(MAX_OBJS), tfkl.Reshape((MAX_OBJS, 16))])(s4) Xs4 = s4b * Xi # print('Xs4: ', Xs2.shape) s5 = tfkl.Dense(16, activation='softmax')(tia[:, :, 4]) s5b = tfkm.Sequential([tfkl.RepeatVector(MAX_OBJS), tfkl.Reshape((MAX_OBJS, 16))])(s5) Xs5 = s5b * Xi s6 = tfkl.Dense(16, activation='softmax')(tia[:, :, 5]) s6b = tfkm.Sequential([tfkl.RepeatVector(MAX_OBJS), tfkl.Reshape((MAX_OBJS, 16))])(s6) Xs6 = s6b * Xi xt = tfkl.concatenate([Xi, Xs1, Xs2, Xs3, Xs4, Xs5, Xs6], axis=-1) # print('xt: ', xt.shape) attn = fcnet(xt) # print('attn: ', attn.shape) Y = tfkb.sum(attn * X[:, :, :2], axis=[1]) # print('Y: ', Y.shape) model = tfkm.Model(inputs=[T, X], outputs=[Y]) def acc(y_pred, y_true): return tfkb.mean(tfkb.min(tfkb.cast((tfkb.abs(y_true-y_pred) < args.tol), 'float32'), axis=1)) model.compile(tfk.optimizers.Adam(args.lr), 'mse', metrics=[acc]) return model
def build_repeat_layer(self, dec_input): """ Repeat decoder input to generate sequence. """ return layers.RepeatVector(self.input_shape[0], name='repeat_layer')(dec_input)
model.add(layers.Bidirectional( layers.LSTM( width_pre, return_sequences = True, dropout = dropout ) )) model.add(layers.Bidirectional( layers.LSTM( width_pre, dropout = dropout ) )) # Mid model.add(layers.RepeatVector(t_out)) model.add(layers.LSTM( width_mid, return_sequences = True, dropout = dropout )) # Post for _ in range(depth_post - 1): model.add(layers.Bidirectional( layers.LSTM( width_pre, return_sequences = True, dropout = dropout ) ))
return X, np.array(Y), Xoh, Yoh #------------------------------------utils end------------------------------------# m = 10000 dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m) # dataset: [(ori-data, format-data), ......] Tx, Ty = 30, 10 X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty) # X.shape: (10000, 30) # Y.shape: (10000, 10) # Xoh.shape: (10000, 30, 37) # Yoh.shape: (10000, 10, 11) repeator = layers.RepeatVector(Tx) concatenator = layers.Concatenate(axis=-1) densor1 = layers.Dense(10, activation='tanh') densor2 = layers.Dense(1, activation='relu') activator = layers.Activation('softmax') dotor = layers.Dot(axes=1) def one_step_attention(a, s_prev): """ 执行一步attention,输出一个context向量 Args: a: attention前的BiLSTM的输出隐藏状态(m, Tx, 2*n_a) s_prev: attention的LSTM层的前一个隐藏状态(m, n_s) Returns: context: 上下文向量,下一个attention-LSTM层的输入
EPOCHS = 10 HIDDEN_SIZE = 256 RNN = layers.LSTM callbacks = [ ModelCheckpoint(filepath='checkpoint.h5', verbose=1, save_best_only=True), EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=2, mode='auto'), TensorBoard(log_dir='logs', histogram_freq=0, batch_size=BATCH_SIZE, write_grads=True, write_images=True) ] model = Sequential([ layers.InputLayer((7, len(CHARS))), RNN(HIDDEN_SIZE), layers.RepeatVector(3), RNN(128, return_sequences=True), layers.TimeDistributed(layers.Dense(len(CHARS), activation='softmax')) ]) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() train_generator = encode_generator(training_generator, BATCH_SIZE) hist = model.fit_generator(train_generator, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS, verbose=1,
def build(self) -> Model: """Builds and returns the Keras classification model. The model is a CNN LSTM with the attention mechanism. It can integrate pre-trained word embeddings. Loss is categorical cross-entropy and the Adam algorithm is used to minimize it. The accuracy on the training data is recorded at each epoch. :return: Keras model """ inputs = layers.Input(shape=(self.sequence_length, )) # Pre-trained embeddings if self.embeddings is not None: vector_dim = self.embeddings.shape[1] representation = layers.Embedding( input_dim=self.input_dim, output_dim=vector_dim, weights=[self.embeddings], input_length=self.sequence_length, trainable=False )( inputs ) # The embedding weights will remain fixed as there isn't much data per class else: assert self.vector_dim is not None, "If not using pretrained embeddings, an embedding dimension has " \ "to be provided." embedded = layers.Embedding( input_dim=self.input_dim, output_dim=self.vector_dim, input_length=self.sequence_length, trainable=True )( inputs ) # In this case there are no pre-trained embeddings so training is required conv = layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(embedded) pool = layers.MaxPooling1D(pool_size=2)(conv) pool = layers.BatchNormalization()(pool) recurrent = layers.LSTM(units=100, return_sequences=True)(pool) # compute importance for each step (attention mechanism) attention = layers.Dense(1, activation='tanh')(recurrent) attention = layers.Flatten()(attention) attention = layers.Activation('softmax')(attention) attention = layers.RepeatVector(100)(attention) attention = layers.Permute([2, 1])(attention) # Complete text representation representation = layers.Multiply()([recurrent, attention]) embedded = layers.Flatten()(representation) # Classify classification = layers.Dense(10, activation="relu")(embedded) classification = layers.Dense(self.label_dim, activation="softmax")(classification) # Create the model model = Model([inputs], classification) # Compile model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc"]) return model
def one_hot(x): x = backend.argmax(x) x = tf.one_hot(x, 90) x = layers.RepeatVector(1)(x) return x
def build(sequence_length: int, input_dim: int, label_dim: int, embeddings: np.array = None, vector_dim: int = None) -> Model: """Builds and returns the Keras classification model. The model is a CNN LSTM with the attention mechanism. It can integrate pre-trained word embeddings. Loss is categorical cross-entropy and the Adam algorithm is used to minimize it. The accuracy on the training data is recorded at each epoch. :param sequence_length: int, refers to the maximum number of words in the input. If x was the training data, this would be x.shape[1]. :param input_dim: int, number of words in the embeddings, i.e. the highest word id after tokenizing words. :param label_dim: int, number of classes. :param embeddings: np.array, pre-trained word embeddings (e.g. GloVe). If set to None, the embeddings will be trained, otherwise they will remain fixed. :param vector_dim: int, dimension of the word embeddings. If embeddings are provided, this will be automatically set to embeddings.shape[1] :return: Keras model """ inputs = layers.Input(shape=(sequence_length, )) # Pre-trained embeddings if embeddings is not None: vector_dim = embeddings.shape[1] representation = layers.Embedding( input_dim=input_dim, output_dim=vector_dim, weights=[embeddings], input_length=sequence_length, trainable=False )( inputs ) # The embedding weights will remain fixed as there isn't much data per class else: assert vector_dim is not None, "If not using pretrained embeddings, an embedding dimension has " \ "to be provided." embedded = layers.Embedding( input_dim=input_dim, output_dim=vector_dim, input_length=sequence_length, trainable=True )( inputs ) # In this case there are no pre-trained embeddings so training is required conv = layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(embedded) pool = layers.MaxPooling1D(pool_size=2)(conv) pool = layers.BatchNormalization()(pool) recurrent = layers.LSTM(units=100, return_sequences=True)(pool) # compute importance for each step (attention mechanism) attention = layers.Dense(1, activation='tanh')(recurrent) attention = layers.Flatten()(attention) attention = layers.Activation('softmax')(attention) attention = layers.RepeatVector(100)(attention) attention = layers.Permute([2, 1])(attention) # Complete text representation representation = layers.Multiply()([recurrent, attention]) embedded = layers.Flatten()(representation) # Classify classification = layers.Dense(500, activation="relu")(embedded) classification = layers.Dropout(0.4)(classification) classification = layers.BatchNormalization()(classification) classification = layers.Dense(200, activation="relu")(classification) classification = layers.Dropout(0.4)(classification) classification = layers.BatchNormalization()(classification) classification = layers.Dense(100, activation="relu")(classification) classification = layers.Dropout(0.4)(classification) classification = layers.Dense(10, activation="relu")(classification) classification = layers.Dense(label_dim, activation="softmax")(classification) # Create the model model = Model([inputs], classification) # Compile model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc"]) return model
original_dim = 96 intermediate_dim = 1024 latent_dim = 16 # Define encoder model. original_inputs = tf.keras.Input(shape=(original_dim,1), name='encoder_input') input_err = Input(shape=(original_dim,1)) x = layers.CuDNNLSTM(intermediate_dim, return_sequences=False)(original_inputs) z_mean = layers.Dense(latent_dim, name='z_mean')(x) z_log_var = layers.Dense(latent_dim, name='z_log_var')(x) z = Sampling()((z_mean, z_log_var)) encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name='encoder') # Define decoder model. latent_inputs = tf.keras.Input(shape=(latent_dim,), name='z_sampling') x = layers.RepeatVector(original_dim)(latent_inputs) x = layers.CuDNNLSTM(intermediate_dim, return_sequences=True)(x) outputs = layers.TimeDistributed(layers.Dense(1))(x) decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name='decoder') # Define VAE model. outputs = decoder(z) vae = tf.keras.Model(inputs=[original_inputs, input_err], outputs=outputs, name='vae') # Add KL divergence regularization loss. kl_loss = - 0.5 * tf.reduce_mean( z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1) vae.add_loss(kl_loss) optimizer = tf.keras.optimizers.SGD(lr=7.5e-5, clipvalue=0.5) #Adam(clipvalue=0.5)
def build_multi_track_vae( optimizer, lstm_units, latent_dim, embedding_dim, n_timesteps, n_tracks, n_notes, dropout_rate=0.2, gru=False, bidirectional=False, ): """Build a multi-track LSTM-VAE Keras model for autoencoding polyphonic music.""" # define encoder model inputs = layers.Input(shape=(n_timesteps, n_tracks)) if gru: rnn = layers.GRU else: rnn = layers.LSTM if embedding_dim > 0: encoder = layers.Embedding(n_notes, embedding_dim, input_length=n_timesteps)(inputs) encoder = layers.Reshape( (n_timesteps, embedding_dim * n_tracks))(encoder) if bidirectional: encoder = layers.Bidirectional( rnn(lstm_units, return_sequences=True))(encoder) else: encoder = rnn(lstm_units, return_sequences=True)(encoder) else: if bidirectional: encoder = layers.Bidirectional( rnn(lstm_units, return_sequences=True))(inputs) else: encoder = rnn(lstm_units, return_sequences=True)(inputs) encoder = layers.Dropout(dropout_rate)(encoder) if bidirectional: encoder = layers.Bidirectional(rnn(lstm_units, return_sequences=False))(encoder) else: encoder = rnn(lstm_units, return_sequences=False)(encoder) mu = layers.Dense(latent_dim, name="mu")(encoder) sigma = layers.Dense(latent_dim, name="sigma")(encoder) # Latent space sampling z = layers.Lambda(sample_normal, output_shape=(latent_dim, ))([mu, sigma]) encoder_model = keras.Model(inputs, [mu, sigma, z]) # define decoder model decoder_input = layers.Input(shape=(latent_dim, )) decoder = layers.RepeatVector(n_timesteps)(decoder_input) decoder = rnn(lstm_units, return_sequences=True)(decoder) decoder = layers.Dropout(dropout_rate)(decoder) decoder = rnn(lstm_units, return_sequences=True)(decoder) outputs = [ layers.TimeDistributed( layers.Dense(n_notes, activation="softmax", name=f"track_{i}"))(decoder) for i in range(n_tracks) ] decoder_model = keras.Model(decoder_input, outputs) # connect encoder and decoder together decoder_outputs = decoder_model(z) vae_model = keras.Model(inputs=inputs, outputs=decoder_outputs) kl_loss = -0.5 * tf.reduce_mean(sigma - tf.square(mu) - tf.exp(sigma) + 1) vae_model.add_loss(kl_loss) vae_model.compile( optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["sparse_categorical_accuracy"], ) return vae_model, encoder_model, decoder_model