def build_lstm_lstm(): global n_past, n_future, n_features inputA = keras.Input(shape=(n_past, n_features_ori), name="cA") inputD = keras.Input(shape=(n_past, n_features), name="cD") #x is the CNN for approximate x = layers.CuDNNLSTM(200, return_sequences=False)(inputA) x = layers.Dropout(0.2)(x) x = layers.Dense(100, activation='relu')(x) x = layers.Dropout(0.2)(x) x = layers.Dense(100, activation='relu')(x) #y is the LSTM for detail y = layers.CuDNNLSTM(200, return_sequences=False)(inputD) y = layers.Dropout(0.2)(y) y = layers.Dense(100, activation='relu')(y) y = layers.Dropout(0.2)(y) y = layers.Dense(10, activation='sigmoid')(y) #combining 2 lstm com = layers.concatenate([x, y]) # z = LSTM(200, activation='relu', return_sequences=False)(com) # z = Dense(100, activation="relu") z = layers.Dense(n_future)(com) model = keras.Model(inputs=[inputA, inputD], outputs=z) model.compile(loss='mse', optimizer=my_optimizer) model.summary() return model
def build_cnn_autolstm(): global n_past, n_future, n_features inputA = keras.Input(shape=(n_past, int(n_features)), name="cA") inputD = keras.Input(shape=(n_past, int(n_features)), name="cD") #x is the CNN for approximate x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputA) x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(x) x = layers.MaxPooling1D(pool_size=2)(x) x = layers.Flatten()(x) x = layers.Dropout(0.3)(x) x = layers.Dense(100, activation='relu')(x) x = layers.Dropout(0.3)(x) x = layers.Dense(50, activation='relu')(x) # x = layers.Dense(n_future)(x) #y is the LSTM for detail y = layers.CuDNNLSTM(200, return_sequences=False)(inputD) y = layers.RepeatVector(n_future)(y) y = layers.CuDNNLSTM(200, return_sequences=True)(y) y = layers.TimeDistributed(layers.Dense(100, activation='relu'))(y) y = layers.TimeDistributed(layers.Dense(50))(y) y = layers.CuDNNLSTM(50)(y) # y = layers.Reshape((-1,50))(y) # y = layers.Dense(50,activation='sigmoid')(y) #combining 2 lstm com = layers.concatenate([x, y]) # z = LSTM(200, activation='relu', return_sequences=False)(com) # z = Dense(100, activation="relu") z = layers.Dense(n_future)(com) model = keras.Model(inputs=[inputA, inputD], outputs=z) model.compile(loss='mse', optimizer=my_optimizer) model.summary() return model
def build_2d_model(args): l2r = 1e-9 T, X = tfkl.Input((N_TOKS,)), tfkl.Input((H, W, 3 + N_OBJS)) ti = tfkl.Embedding(N_VOCAB, N_EMBED, input_length=N_TOKS)(T) print(ti.shape) th = tfkm.Sequential([ tfkl.Bidirectional(tfkl.CuDNNLSTM(128, return_sequences=True)), tfkl.Bidirectional(tfkl.CuDNNLSTM(128, return_sequences=True)), tfkl.Conv1D(256, (1,), activation='elu', kernel_regularizer=tfkr.l2(l2r)), tfkl.Conv1D(6, (1,), activation=None, kernel_regularizer=tfkr.l2(l2r)), tfkl.Softmax(axis=-2, name='lstm_attn'), ], name='lstm_layers')(ti) tia = tfkb.sum(tfkl.Reshape((N_TOKS, 1, -1))(th) * tfkl.Reshape((N_TOKS, N_EMBED, 1))(ti), axis=-3) Xi = tfkb.sum(X[:, :, :, 3:], axis=-1, keepdims=True) s1 = tfkl.Dense(N_OBJS, activation='softmax')(tia[:, :, 0]) s1b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, N_OBJS))])(s1) Xs1 = tfkb.sum(X[:, :, :, 3:] * s1b, axis=-1, keepdims=True) s2 = tfkl.Dense(3)(tia[:, :, 1]) s2b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 3))])(s2) s2c = tfkb.sum(s2b * X[:, :, :, 2:3] - (1 - Xi) * 20, axis=-1, keepdims=True) Xs2 = tfkm.Sequential([tfkl.Reshape((-1, 1)), tfkl.Softmax(axis=-2), tfkl.Reshape((H, W, 1))])(s2c) Xs2 = Xs2 - tfkb.max(Xs2, axis=[1, 2], keepdims=True) s3 = tfkl.Dense(N_OBJS, activation='softmax')(tia[:, :, 2]) s3b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, N_OBJS))])(s3) Xs3 = tfkb.sum(X[:, :, :, 3:] * s3b, axis=-1, keepdims=True) s4 = tfkl.Dense(16, activation='softmax')(tia[:, :, 3]) s4b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 16))])(s4) Xs4 = s4b * Xi s5 = tfkl.Dense(16, activation='softmax')(tia[:, :, 4]) s5b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 16))])(s5) Xs5 = s5b * Xi s6 = tfkl.Dense(16, activation='softmax')(tia[:, :, 5]) s6b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 16))])(s6) Xs6 = s6b * Xi xt = tfkl.concatenate([Xi, Xs1, Xs2, Xs3, Xs4, Xs5, Xs6], axis=-1) attn = unet(xt) Y = tfkb.sum(attn * X[:, :, :, :2], axis=[1, 2]) model = tfkm.Model(inputs=[T, X], outputs=[Y]) def acc(y_pred, y_true): return tfkb.mean(tfkb.min(tfkb.cast((tfkb.abs(y_true-y_pred) < args.tol), 'float32'), axis=1)) model.compile(tfk.optimizers.Adam(args.lr), 'mse', metrics=[acc]) return model
def __init__(self, n_feat=22, n_lstm=1, lstm_sizes="[5]", fc_sizes="[80]", lstm_dropout=0.2, dropout=0.1, activation='sigmoid'): super(LSTM_one_to_one, self).__init__() lstm_sizes = ast.literal_eval(lstm_sizes) fc_sizes = ast.literal_eval(fc_sizes) shape = (None, n_feat) Input = keras.Input(shape) slices = layers.Lambda( lambda x, i: x[:, :, i: i + 1], name='slicer_lambda') y = layers.Masking(mask_value=0, name="masking")(Input) n_hidden = lstm_sizes[0] lstms = [layers.CuDNNLSTM( n_hidden, return_sequences=False, name="lstm1_feature_%d" % _) for _ in range(n_feat)] ys = [] for i, lstm in enumerate(lstms): slices.arguments = {'i': i} ys.append(lstm(slices(y))) y = layers.concatenate(ys, axis=-1, name="merge") for i, fc in enumerate(fc_sizes): y = layers.Dense(fc, activation=activation, name="fc_%d" % i)(y) y = layers.Dropout(dropout, name="dropout_%i" % i)(y) y = layers.Dense(1, activation=activation)(y) self.model = keras.Model(Input, y)
def build_ende_lstm(): global n_past,n_future,n_features input = keras.Input(shape=(n_past, int(n_features))) # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input) x = layers.CuDNNLSTM(200, input_shape=(n_past, n_features),return_sequences=False)(input) x = layers.RepeatVector(n_future)(x) # x = layers.LSTM(200, activation='relu',return_sequences=True)(x) x = layers.CuDNNLSTM(200, return_sequences=True)(x) x = layers.TimeDistributed(layers.Dense(100, activation='relu'))(x) x = layers.TimeDistributed(layers.Dense(1))(x) out = layers.Reshape((-1,n_future))(x) model = keras.Model(inputs=[input], outputs=out) model.compile(loss='mse', optimizer=my_optimizer) model.summary() return model
def __init__(self, config): super(HBMP, self).__init__() self.config = config self.max_pool = layers.GlobalMaxPool1D() # self.cells = config.cells self.hidden_dim = config.hidden_dim self.rnn1 = layers.CuDNNLSTM(units=config.hidden_dim, return_sequences=True) self.rnn2 = layers.CuDNNLSTM(units=config.hidden_dim, return_sequences=True) self.rnn3 = layers.CuDNNLSTM(units=config.hidden_dim, return_sequences=True) self.bidirectional_1 = layers.Bidirectional(self.rnn1) self.bidirectional_2 = layers.Bidirectional(self.rnn2) self.bidirectional_3 = layers.Bidirectional(self.rnn3)
def build_cnnlstm3(): global n_past, n_future, n_features inputA3 = keras.Input(shape=(n_past, n_features), name="cA3") inputD3 = keras.Input(shape=(n_past, n_features), name="cD3") inputD2 = keras.Input(shape=(n_past, n_features), name="cD2") inputD1 = keras.Input(shape=(n_past, n_features), name="cD1") #x is the CNN for approximate x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputA3) x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(x) x = layers.MaxPooling1D(pool_size=2)(x) x = layers.Flatten()(x) x = layers.Dense(200, activation='relu')(x) x = layers.Dropout(0.2)(x) x = layers.Dense(144, activation='relu')(x) x = layers.Dense(n_future)(x) #y is the LSTM for detail b = layers.CuDNNLSTM(200, return_sequences=False)(inputD3) b = layers.Dropout(0.2)(b) b = layers.Dense(100)(b) b = layers.Dropout(0.2)(b) b = layers.Dense(n_future, activation='tanh')(b) # b = layers.LeakyReLU()(b) c = layers.CuDNNLSTM(200, return_sequences=False)(inputD2) c = layers.Dropout(0.2)(c) c = layers.Dense(100)(c) c = layers.Dropout(0.2)(c) c = layers.Dense(n_future, activation='tanh')(c) # c = layers.LeakyReLU()(c) d = layers.CuDNNLSTM(200, return_sequences=False)(inputD1) d = layers.Dropout(0.2)(d) d = layers.Dense(100)(d) d = layers.Dropout(0.2)(d) d = layers.Dense(n_future, activation='tanh')(d) # d = layers.LeakyReLU()(d) #combining 2 lstm com = layers.concatenate([x, b, c, d]) # z = LSTM(200, activation='relu', return_sequences=False)(com) # z = Dense(100, activation="relu") z = layers.Dense(n_future)(com) model = keras.Model(inputs=[inputA3, inputD3, inputD2, inputD1], outputs=z) model.compile(loss='mse', optimizer=my_optimizer) model.summary() return model
def build_lstm_v2(): global n_past, n_future, n_features input = keras.Input(shape=(n_past, int(n_features))) # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input) x = layers.CuDNNLSTM(300, return_sequences=True)(input) x = layers.CuDNNLSTM(300, return_sequences=False)(x) x = layers.BatchNormalization()(x) x = layers.Dropout(0.2)(x) x = layers.Dense(100, activation='relu')(x) x = layers.Dropout(0.2)(x) x = layers.Dense(n_future)(x) model = keras.Model(inputs=[input], outputs=x) model.compile(loss='mse', optimizer='adam') model.summary() plot_model(model, to_file=save_path + 'model_{}.png'.format(syn), show_shapes=True) return model
def __init__(self, config): super(BiLSTMMaxPoolEncoder, self).__init__() self.config = config self.rnn = layers.CuDNNLSTM(units=config.hidden_dim, return_sequences=True) self.bidirectional = layers.Bidirectional(self.rnn) self.dropout = layers.Dropout(config.dropout) self.max_pool = layers.GlobalMaxPool1D()
def RNNSpeechModel(nCategories, samplingrate=16000, inputLength=16000): # simple LSTM sr = samplingrate iLen = inputLength inputs = L.L.Input((iLen,)) x = L.Reshape((1, -1))(inputs) x = Melspectrogram(n_dft=1024, n_hop=128, input_shape=(1, iLen), padding='same', sr=sr, n_mels=80, fmin=40.0, fmax=sr / 2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='mel_stft')(x) x = Normalization2D(int_axis=0)(x) # note that Melspectrogram puts the sequence in shape (batch_size, melDim, timeSteps, 1) # we would rather have it the other way around for LSTMs x = L.Permute((2, 1, 3))(x) x = L.Conv2D(10, (5, 1), activation='relu', padding='same')(x) x = L.BatchNormalization()(x) x = L.Conv2D(1, (5, 1), activation='relu', padding='same')(x) x = L.BatchNormalization()(x) # x = Reshape((125, 80)) (x) # keras.backend.squeeze(x, axis) x = L.Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(x) x = L.Bidirectional(L.CuDNNLSTM(64, return_sequences=True))( x) # [b_s, seq_len, vec_dim] x = L.Bidirectional(L.CuDNNLSTM(64))(x) x = L.Dense(64, activation='relu')(x) x = L.Dense(32, activation='relu')(x) output = L.Dense(nCategories, activation='softmax')(x) model = Model(inputs=[inputs], outputs=[output]) return model
def __init__(self, original_dim, intermediate_dim=1024, name='decoder', **kwargs): super(Decoder, self).__init__(name=name, **kwargs) self.repeat = layers.RepeatVector(original_dim) self.dense_proj = layers.CuDNNLSTM(intermediate_dim, return_sequences=True) self.dense_output = layers.TimeDistributed(layers.Dense(1))
def __init__(self, latent_dim=16, intermediate_dim=1024, name='encoder', **kwargs): super(Encoder, self).__init__(name=name, **kwargs) self.dense_proj = layers.CuDNNLSTM(intermediate_dim, return_sequences=False) self.dense_mean = layers.Dense(latent_dim) self.dense_log_var = layers.Dense(latent_dim) self.sampling = Sampling()
def build_lstm(): global n_past,n_future,n_features input = keras.Input(shape=(n_past, int(n_features))) # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input) x = layers.CuDNNLSTM(200,input_shape=(n_past, n_features),return_sequences=False)(input) x = layers.Dropout(0.5)(x) x = layers.Dense(100, activation='relu')(x) x = layers.Dropout(0.5)(x) x = layers.Dense(n_future)(x) model = keras.Model(inputs=[input], outputs=x) model.compile(loss='mse', optimizer=my_optimizer) model.summary() return model
def decoder_smate(encoder, timesteps, data_dim, pool_step): input_ = encoder.output # input: (batch_size, timesteps, latent_dim) out = ll.UpSampling1D(size=pool_step)(input_) # 1D-CNN for reconstructing the spatial information #cells = [rnn_cell(module_name) for _ in range(num_layers)] #out = ll.RNN(cells, return_sequences=True)(out) # temporal axis if (module_name == 'gru'): if (tf.test.is_gpu_available()): out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(out) for i in range(num_layers - 1): out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) else: out_t = ll.GRU(hidden_dim, return_sequences=True)(out) for i in range(num_layers - 1): out_t = ll.GRU(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) elif (module_name == 'lstm'): if (tf.test.is_gpu_available()): out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(out) for i in range(num_layers - 1): out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) else: out_t = ll.LSTM(hidden_dim, return_sequences=True)(out) for i in range(num_layers - 1): out_t = ll.LSTM(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) out = ll.Dense(data_dim, activation='sigmoid')(out_t) model = Model(encoder.input, out) return model
def build_tanh_lstm(): global n_past, n_future, n_features input = keras.Input(shape=(n_past, int(n_features))) # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input) x = layers.CuDNNLSTM(1000)(input) # x = layers.BatchNormalization()(x) x = layers.Dropout(0.2)(x) x = layers.Dense(500, activation='relu')(x) x = layers.Dropout(0.2)(x) x = layers.Dense(n_future, activation='tanh')(x) model = keras.Model(inputs=[input], outputs=x) model.compile(loss='mse', optimizer='adam') model.summary() return model
def pLSTM(input, width, bn=False, cudnn=tf_cuda_available(), **kwargs): if bn: print('WARNING: Batch normalisation can be unstable with LSTM layers') # TODO Test batch normalisation: Does not always work if cudnn: output = kl.CuDNNLSTM(width, return_sequences=True, **kwargs)(input) else: output = kl.LSTM(width, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, **kwargs)(input) # For compatibility with CuDNNLSTM # l_out = kl.LeakyReLU(alpha=0.3)(l_out) # TODO Makes it unstable. tanh works though return output
def PretrainedLSTM(save_path, input_layer=None, return_sequences=False, load_weights=True): """ :params: - save_path : Folder where pretrained files have been saved - input_layer (optional): Should be keras Input layer with tf.string input :output: - keras model NOTE: you have to call tf.tables_initializer().run() somewhere before fitting the data. """ assert os.path.exists(save_path), "{} doesn't exist".format(save_path) config = json.load(open(os.path.join(save_path, "config.json"))) weights = pickle.load(open(os.path.join(save_path, "weights.pkl"), "rb")) vocab_file = os.path.join(save_path, "vocab.txt") #config max_vocab_size = config["max_vocab_size"] embed_size = config["embed_size"] hidden_size = config["hidden_size"] num_layers = config["num_layers"] if input_layer == None: input_layer = layers.Input(shape=(None, ), dtype="string") lookup_vocab = layers.Lambda(lambda x: lookup_layer(x, vocab_file)) input_layer_idx = lookup_vocab(input_layer) embeded_input = layers.Embedding(max_vocab_size, embed_size, weights=weights["embedding"] if load_weights else None)\ (input_layer_idx) embeded_input = layers.Dropout(0.3)(embeded_input) rnn_input = embeded_input for i in range(num_layers): rnn_output = layers.CuDNNLSTM( units=hidden_size, return_sequences=return_sequences, weights=weights["lstm"][i] if load_weights else None)(rnn_input) model = Model(inputs=[input_layer], outputs=[rnn_output]) return model
def keras_model_fn(model_config, vocab_size, embedding_size, embeddings): """GPU version of Stacked Bi-LSTM and Bi-GRU with Two Fasttext """ ## hyperparams model_name = model_config['model_name'] num_class = model_config['num_class'] lstm_hs = model_config['lstm_hs'] gru_hs = model_config['gru_hs'] learning_rate = model_config['learning_rate'] ## build model - , weights=[embeddings[1]] inputs = ks.Input(shape=(None, ), dtype='int32', name='inputs') embedded_sequences_ft1 = layers.Embedding(vocab_size, embedding_size, trainable=True, mask_zero=False)(inputs) embedded_sequences_ft2 = layers.Embedding(vocab_size, embedding_size, trainable=True, mask_zero=False)(inputs) concat_embed = layers.concatenate( [embedded_sequences_ft1, embedded_sequences_ft2]) concat_embed = layers.SpatialDropout1D(0.5)(concat_embed) x = layers.Bidirectional(layers.CuDNNLSTM( lstm_hs, return_sequences=True))(concat_embed) x, x_h, x_c = layers.Bidirectional( layers.CuDNNGRU(gru_hs, return_sequences=True, return_state=True))(x) x_1 = layers.GlobalMaxPool1D()(x) x_2 = layers.GlobalAvgPool1D()(x) x_out = layers.concatenate([x_1, x_2, x_h]) x_out = layers.BatchNormalization()(x_out) outputs = layers.Dense(num_class, activation='softmax', name='outputs')(x_out) # outputs model = ks.Model(inputs, outputs, name=model_name) ## compile model.compile(loss='categorical_crossentropy', optimizer=ks.optimizers.Adam(lr=learning_rate, clipnorm=.25, beta_1=0.7, beta_2=0.99), metrics=[ 'categorical_accuracy', ks.metrics.TopKCategoricalAccuracy(k=3) ]) # metric what? return model
def load(input_shape, output_shape, cfg): nb_lstm_states = int(cfg['nb_lstm_states']) inputs = KL.Input(shape=input_shape) x = KL.CuDNNLSTM(units=nb_lstm_states, unit_forget_bias=True)(inputs) x = KL.Dense(512)(x) x = KL.Activation('relu')(x) x = KL.Dropout(0.2)(x) x = KL.Dense(256)(x) x = KL.Activation('relu')(x) x = KL.Dropout(0.3)(x) mu = KL.Dense(1)(x) std = KL.Dense(1)(x) activation_fn = get_activation_function_by_name(cfg['activation_function']) std = KL.Activation(activation_fn, name="exponential_activation")(std) output = KL.Concatenate(axis=-1)([std, mu]) model = KM.Model(inputs=[inputs], outputs=[output]) return model
def __init__(self, dim_z, seq_len, dim_y=1, dim_u=0, rnn_units=32, no_use_cudnn_rnn=True, **kwargs): super(DeepState, self).__init__() self.seq_len = seq_len self.dim_z = dim_z self.dim_y = dim_y # Create model if no_use_cudnn_rnn: self.rnn = layers.LSTM(rnn_units, return_sequences=True) else: self.rnn = layers.CuDNNLSTM(rnn_units, return_sequences=True) self.A = layers.Dense(dim_z * dim_z) self.C = layers.Dense(dim_z) self.Q = layers.Dense(dim_z * dim_z) self.R = layers.Dense(dim_y * dim_y) self.mu = layers.Dense(dim_z) self.sigma = layers.Dense(dim_z * dim_z) self._alpha_sq = tf.constant(1., dtype=tf.float32) # fading memory control self.M = 0 # process-measurement cross correlation # identity matrix self._I = tf.eye(dim_z, name='I') self.state = kwargs.pop('state', None) self.log_likelihood = None
def language_model_graph(input_tokens, output_tokens, initial_state, num_layers, max_vocab_size, vocab_freqs, batch_size, embed_size, hidden_size, dropout, optimizer, num_candidate_samples, maxlen, clip, type_="rnn"): """ This creates language model tensorflow graph. It takes placeholder for input tokens, output_tokens (target), initial state for LSTM layers. Lanugage model graph has Embedding Layer followed by LSTM layers. Loss is calculated using sampled softmax layer of tensorflow. :params: - input_tokens: Placeholder for input tokens [shape:(batch_size, None)] - output_tokens: Placeholder for output tokens (used as target) [shape:(batch_size, None)] - initial_state: Initial states placeholder for feeding state in LSTM Layers [shape:(num_layers, batch_size, hidden_size)] - num_layers: Number of LSTM Layers - max_vocab_size: Maximum Vocabulary size - vocab_freqs: Frequency of words - batch_size: Batch Size (should not be none) - embed_size: Embedding Dimensions - hidden_size: Hidden size of LSTM layers - dropout: Dropout to keep between Layers, same dropout is applied after Embedding as well as between and after LSTM Layers - num_candidate_samples: Candidate Samples to consider for Sampled softmax -1 to calculate complete softmax - maxlen: Sequence length of examples (bptt) - clip: clip gradients by `clip` :returns: - train_op: Training Op Tensorflow - training_flag: Var for training flag - sampled_loss: Sampled Loss Variable - loss: Complete Loss Variable - final_state: Output State of LSTMs - weights: Dictionay containing weights of Embedding and LSTM layers - learning_rate: Learning Rate Variable """ bptt = tf.shape(input_tokens)[1] training_flag = tf.Variable(True) learning_rate = tf.Variable(20.0) embedding_layer = layers.Embedding(max_vocab_size, embed_size) rnn_layers = [] for i in range(num_layers): rnn_layers.append(layers.CuDNNLSTM(units=hidden_size, return_sequences=True, return_state=True)) embedded_input = embedding_layer(input_tokens) embedded_input = tf.layers.dropout( embedded_input , rate=dropout, training=training_flag, ) states = [] rnn_input = embedded_input input_state_cs = initial_state[0] input_state_hs = initial_state[1] if type_ == "rnn": final_state_cs = [] final_state_hs = [] for i in range(num_layers): state_c, state_h = input_state_cs[i], input_state_hs[i] rnn_outputs = rnn_layers[i](rnn_input, initial_state=(state_c, state_h)) rnn_output, final_state_c, final_state_h = rnn_outputs rnn_output = tf.layers.dropout( rnn_output , rate=dropout, training=training_flag, noise_shape=[batch_size, 1, hidden_size] ) final_state_cs.append(final_state_c) final_state_hs.append(final_state_h) final_state_c = tf.stack(final_state_cs, 0) final_state_h = tf.stack(final_state_hs, 0) final_state = (final_state_c, final_state_h) elif type_ == "gcnn": rnn_output = _gcnn_block(rnn_input) final_state = (input_state_cs, input_state_hs) rnn_output = layers.Dense(hidden_size, activation='relu')(rnn_output) # rnn_output = tf.layers.dropout( # rnn_output , # rate=dropout, # training=training_flag, # noise_shape=[batch_size, 1, embed_size] # ) weight = embedding_layer.weights[0] weight = tf.transpose(weight, [1, 0]) # weight = None with tf.variable_scope("loss"): sampled_loss = _sampled_lm_loss(rnn_output, output_tokens, max_vocab_size, vocab_freqs=vocab_freqs, num_candidate_samples=num_candidate_samples, weight=weight) with tf.variable_scope("loss", reuse=True): loss = _sampled_lm_loss(rnn_output, output_tokens, max_vocab_size, vocab_freqs=vocab_freqs, num_candidate_samples=-1, weight=weight) # softmax = AdaptiveSoftmax(hidden_size, cutoff=[2800, 20000, 76000]) # loss, _ = sampled_loss, _ = softmax.loss(rnn_output, output_tokens) with tf.variable_scope("optimizer"): # sampled_loss = loss t_vars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(sampled_loss*maxlen, t_vars), clip) if optimizer == "adam": train_op = tf.compat.v1.train.AdamOptimizer(learning_rate).apply_gradients(zip(grads, t_vars)) elif optimizer == "sgd": train_op = tf.train.GradientDescentOptimizer(learning_rate).apply_gradients(zip(grads, t_vars)) else: train_op = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=0.9).apply_gradients(zip(grads, t_vars)) # Extract Weights weights = {} weights["embedding"] = embedding_layer.weights weights["lstm"] = [rnn_layer.weights for rnn_layer in rnn_layers] return train_op, training_flag, sampled_loss, loss, final_state, weights,\ learning_rate
def main(embedding_dim, n_neuron, lrate, remove_stop_words, n_rec_layer): nltk.download('stopwords') nltk.download('punkt') start = time.time() seed = 30 random.seed(seed) np.random.seed(seed) tf.compat.v1.set_random_seed(seed) train_percent = 0.7 val_percent = 0.15 stemming = False if remove_stop_words == "F": words_to_remove = ["\n", "\s"] else: words_to_remove = stopwords.words("english") + ["\n", "\s"] path = "poems_reordered_further_cleaned.csv" df = pd.read_csv(path) #shuffle dataset df = df.sample(frac=1, random_state=seed).reset_index(drop=True) all_words = [] count = 0 max = 0 for i in range(0, df.shape[0]): line = df.iloc[i, 1] words = StemmingUtil.parseTokens(line) words = [w for w in words if w not in words_to_remove] if stemming: words = StemmingUtil.createStems(words) newline = " ".join(words) df.iloc[i, 1] = newline count += len(words) if len(words) > max: max = len(words) #print(len(words)) all_words = all_words + words average_len = int(count / df.shape[0]) all_words = set(all_words) vocab_size = len(all_words) #print(max_length) max_length = 1000 train_set, val_set, test_set = split_data(df, train_percent, val_percent, seed) print("train_set shape:", train_set.shape) print("test_set shape:", test_set.shape) print("val_set shape:", val_set.shape) train_set = train_set.to_numpy() test_set = test_set.to_numpy() val_set = val_set.to_numpy() dataset = df.to_numpy() labels = list(np.unique(dataset[:, 0])) num_label = len(labels) X_train = train_set[:, 1:] Y_train = train_set[:, 0] dummy_Y_train = np.zeros((Y_train.shape[0], num_label)) for i in range(0, Y_train.shape[0]): idx = labels.index(Y_train[i]) dummy_Y_train[i, idx] = 1 X_test = test_set[:, 1:] Y_test = test_set[:, 0] dummy_Y_test = np.zeros((Y_test.shape[0], num_label)) for i in range(0, Y_test.shape[0]): idx = labels.index(Y_test[i]) dummy_Y_test[i, idx] = 1 X_val = val_set[:, 1:] Y_val = val_set[:, 0] dummy_Y_val = np.zeros((Y_val.shape[0], num_label)) for i in range(0, Y_val.shape[0]): idx = labels.index(Y_val[i]) dummy_Y_val[i, idx] = 1 #one hot encode the words X_train = [one_hot(line[0], vocab_size) for line in X_train] X_test = [one_hot(line[0], vocab_size) for line in X_test] X_val = [one_hot(line[0], vocab_size) for line in X_val] #padding X_train = pad_sequences(X_train, maxlen=max_length, padding='post') X_test = pad_sequences(X_test, maxlen=max_length, padding='post') X_val = pad_sequences(X_val, maxlen=max_length, padding='post') if n_rec_layer == 1: model = tf.keras.Sequential() #Typical nnlm models on google hub have the embedding size of 128. #embedding layer is the first layer #number of neurons in the embedding layer equals to the number of values in the encoded vector obtained from embedding, i.e. number of words #input_length is how many words/units you want to embed model.add( layers.Embedding(vocab_size, embedding_dim, input_length=max_length)) model.add( layers.Bidirectional(layers.CuDNNLSTM(n_neuron)) ) #output_dim is the number of neurons in the recurrent layer #256 neurons model.add(layers.Dropout(0.5)) model.add(layers.Dense(num_label, activation='softmax')) model.compile(optimizer=tf.keras.optimizers.Adam(lrate), loss='categorical_crossentropy', metrics=['accuracy']) elif n_rec_layer == 2: model = tf.keras.Sequential() #Typical nnlm models on google hub have the embedding size of 128. #embedding layer is the first layer #number of neurons in the embedding layer equals to the number of values in the encoded vector obtained from embedding, i.e. number of words #input_length is how many words/units you want to embed model.add( layers.Embedding(vocab_size, embedding_dim, input_length=max_length)) model.add( layers.Bidirectional( layers.CuDNNLSTM(n_neuron, return_sequences=True)) ) #output_dim is the number of neurons in the recurrent layer #256 neurons model.add(layers.Dropout(0.5)) model.add(layers.Bidirectional(layers.CuDNNLSTM(n_neuron))) model.add(layers.Dropout(0.5)) model.add(layers.Dense(num_label, activation='softmax')) model.compile(optimizer=tf.keras.optimizers.Adam(lrate), loss='categorical_crossentropy', metrics=['accuracy']) elif n_rec_layer == 3: model = tf.keras.Sequential() #Typical nnlm models on google hub have the embedding size of 128. #embedding layer is the first layer #number of neurons in the embedding layer equals to the number of values in the encoded vector obtained from embedding, i.e. number of words #input_length is how many words/units you want to embed model.add( layers.Embedding(vocab_size, embedding_dim, input_length=max_length)) model.add( layers.Bidirectional( layers.CuDNNLSTM(n_neuron, return_sequences=True)) ) #output_dim is the number of neurons in the recurrent layer #256 neurons model.add(layers.Dropout(0.5)) model.add( layers.Bidirectional( layers.CuDNNLSTM(n_neuron, return_sequences=True))) model.add(layers.Dropout(0.5)) model.add(layers.Bidirectional(layers.CuDNNLSTM(n_neuron))) model.add(layers.Dropout(0.5)) model.add(layers.Dense(num_label, activation='softmax')) model.compile(optimizer=tf.keras.optimizers.Adam(lrate), loss='categorical_crossentropy', metrics=['accuracy']) callbacks = [ EarlyStopping(monitor='val_loss', patience=25), ModelCheckpoint(filepath='best_bidirect_LSTM_keras.h5', monitor='val_loss', save_best_only=True) ] history = model.fit(X_train, dummy_Y_train, epochs=500, callbacks=callbacks, batch_size=128, validation_data=(X_val, dummy_Y_val)) #run test set confusion_matrix = [[0] * num_label for m in range(0, num_label)] for instance in range(0, test_set.shape[0]): #iterate over test cases inputs = X_test[instance, :] target = dummy_Y_test[instance, :] prediction = model.predict_classes(inputs.reshape(1, max_length), batch_size=None, verbose=0) iactual = np.where(target == 1)[0][0] #row index ipredict = prediction[0] #column index confusion_matrix[iactual][ipredict] += 1 n_accurate_test = sum( [confusion_matrix[idx][idx] for idx in range(len(confusion_matrix))]) test_accuracy = n_accurate_test / test_set.shape[0] print("setting:", "embed =", embedding_dim, "n_neuron =", n_neuron, "lrate =", lrate, "remove =", remove_stop_words, "n_rec_layer =", n_rec_layer) print("test accuracy", test_accuracy) setting = "embed_" + str(embedding_dim) + "_neuron_" + str( n_neuron) + "_lrate_" + str(lrate) + "_remove_" + str( remove_stop_words) + "_layer_" + str(n_rec_layer) print_matrix_CI(path, labels, confusion_matrix, test_accuracy, test_set, setting) recall(confusion_matrix, labels) precision(confusion_matrix, labels) #setting = "embed_"+str(embedding_dim)+"_neuron_"+str(n_neuron)+"_lrate_"+str(lrate)+"_remove_"+str(remove_stop_words)+"_layer_"+str(n_rec_layer) # Plot training and validation accuracy over time plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('AccuracyPlot_' + setting) plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper left') plt.show() # Plot training and validation loss overtime plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title("ErrorPlot" + setting) plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper left') plt.show() end = time.time() print("run time", (end - start) / 60, "min") #setting = "embed_"+str(embedding_dim)+"neuron_"+str(n_neuron)+"lrate_"+str(lrate)+"remove_"+str(remove_stop_words)+"layer_"+str(n_rec_layer) return test_accuracy
def encoder_smate_rdp(in_shape, pool_step): input_ = Input(shape=in_shape) # input: (samples, L, input_dims) L = in_shape[0] D = in_shape[1] # temporal axis if (module_name == 'gru'): if (tf.test.is_gpu_available()): out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) else: out_t = ll.GRU(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.GRU(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) elif (module_name == 'lstm'): if (tf.test.is_gpu_available()): out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) else: out_t = ll.LSTM(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.LSTM(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) # 1D-CNN group_size = int(D * 1.5 / 3) in_s = [] for i in range(3): idx_list = rd.sample(range(0, D), group_size) idx_array = np.array(idx_list) in_s_i = input_[:, :, i:i + group_size] in_s.append(in_s_i) out_s_11 = Conv1D(128, 8, padding='same', kernel_initializer='he_uniform')(in_s[0]) out_s_11 = BatchNormalization()(out_s_11) out_s_11 = Activation('relu')(out_s_11) out_s_12 = Conv1D(128, 8, padding='same', kernel_initializer='he_uniform')(in_s[1]) out_s_12 = BatchNormalization()(out_s_12) out_s_12 = Activation('relu')(out_s_12) out_s_13 = Conv1D(128, 8, padding='same', kernel_initializer='he_uniform')(in_s[2]) out_s_13 = BatchNormalization()(out_s_13) out_s_13 = Activation('relu')(out_s_13) out_s_1 = [out_s_11, out_s_12, out_s_13] #out_s = K.concatenate((out_s_11, out_s_12, out_s_13), axis=0) conv1D_2 = Conv1D(256, 5, padding='same', kernel_initializer='he_uniform') conv1D_3 = Conv1D(128, 3, padding='same', kernel_initializer='he_uniform') batch_norm1 = BatchNormalization() batch_norm2 = BatchNormalization() activ_relu1 = Activation('relu') activ_relu2 = Activation('relu') s_outs = [] for s in out_s_1: s_out = conv1D_2(s) s_out = batch_norm1(s_out) s_out = activ_relu1(s_out) s_out = conv1D_3(s_out) s_out = batch_norm2(s_out) s_out = activ_relu2(s_out) s_out = AveragePooling1D(pool_size=pool_step, strides=None, padding='same')(s_out) # L * D s_outs.append(s_out) out_s = ll.Concatenate(axis=-1)(s_outs) # L * 3D #reduce latent space dimension (t & s axis) out_t = AveragePooling1D(pool_size=pool_step, strides=None, padding='same')(out_t) out = ll.Concatenate(axis=-1)([out_t, out_s]) # 1 * 4D out = Dense(128)(out) out = BatchNormalization()(out) out = ll.LeakyReLU()(out) out = Dense(128)(out) out = BatchNormalization()(out) # (samples, L', 128) model = Model(inputs=input_, outputs=out) return model
def encoder_smate(in_shape, pool_step, d_prime, kernels=[8, 5, 3]): input_ = Input(shape=in_shape) # input: (samples, L, input_dims) L = in_shape[0] # temporal axis if (module_name == 'gru'): if (tf.test.is_gpu_available()): out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) else: out_t = ll.GRU(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.GRU(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) elif (module_name == 'lstm'): if (tf.test.is_gpu_available()): out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) else: out_t = ll.LSTM(hidden_dim, return_sequences=True)(input_) for i in range(num_layers - 1): out_t = ll.LSTM(hidden_dim, return_sequences=True)( out_t) # output: (batch_size, timesteps, hidden_dim) # 1D-CNN out_s = spatial_dynamic_block(input_, kernels[0], d_prime) out_s = Conv1D(128, kernels[0], padding='same', kernel_initializer='he_uniform')(input_) out_s = BatchNormalization()(out_s) out_s = Activation('relu')(out_s) out_s = spatial_dynamic_block(out_s, kernels[1], 8) out_s = Conv1D(256, kernels[1], padding='same', kernel_initializer='he_uniform')(out_s) out_s = BatchNormalization()(out_s) out_s = Activation('relu')(out_s) out_s = spatial_dynamic_block(out_s, kernels[2], 16) out_s = Conv1D(128, kernels[2], padding='same', kernel_initializer='he_uniform')(out_s) out_s = BatchNormalization()(out_s) out_s = Activation('relu')(out_s) # L * D #reduce latent space dimension (t & s axis) out_t = AveragePooling1D(pool_size=pool_step, strides=None, padding='same')(out_t) out_s = AveragePooling1D(pool_size=pool_step, strides=None, padding='same')(out_s) # L' * D out = ll.Concatenate(axis=-1)([out_t, out_s]) # (samples, L', 128*4 + 128) out = Dense(128)(out) #out = Dense(128)(out_s) out = BatchNormalization()(out) out = ll.LeakyReLU()(out) out = Dense(128)(out) out = BatchNormalization()(out) # (samples, L', 128) model = Model(inputs=input_, outputs=out) return model
def build_model( input_shape, num_classes, activation_function, dropout_rate, use_batchnorm, l2_regularization, cnn_layers, lstm_units, combine_mode, fcn_layers): ''' Builds a CNN-RNN-FCN classification model # Parameters input_shape (tuple) -- expected input shape num_classes (int) -- number of classes activation_function (str) -- non linearity to apply between layers dropout_rate (float) -- must be between 0 and 1 use_batchnorm (bool) -- if True, batchnorm layers are added between convolutions l2_regularization (float) cnn_layers (list) -- list specifying CNN layers. Each element must be of the form {filters: 32, kernel_size: 3, use_maxpool: true} lstm_units (int) -- number of hidden units of the lstm if lstm_units is None or 0 the LSTM layer is skipped combine_mode (str) -- specifies how the encoding of each image in the sequence is to be combined. Supports: concat : outputs are stacked on top of one another last : only last hidden state is returned attention : an attention mechanism is used to combine the hidden states fcn_layers (list) -- list specifying Dense layers example element: {units: 1024} # Returns model -- an uncompiled Keras model ''' # Regularizer l2_reg = l2(l2_regularization) # Build a model with the functional API inputs = ll.Input(input_shape) x = inputs # Reshape entry if needed if len(input_shape) == 3: x = ll.Reshape([1] + input_shape)(x) elif len(input_shape) < 3: raise ValueError(f"Input shape {input_shape} not supported") # CNN feature extractor for i, cnn_layer in enumerate(cnn_layers): # Extract layer params filters = cnn_layer['filters'] kernel_size = cnn_layer['kernel_size'] use_maxpool = cnn_layer['use_maxpool'] # build cnn_layer x = ll.TimeDistributed(ll.Conv2D( filters, kernel_size, strides=(1, 1), padding='same', data_format=None, dilation_rate=(1, 1), activation=activation_function, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=l2_reg, bias_regularizer=l2_reg, activity_regularizer=None, kernel_constraint=None, bias_constraint=None ), name=f'conv2D_{i}')(x) # add maxpool if needed if use_maxpool: x = ll.TimeDistributed(ll.MaxPooling2D( pool_size=(2, 2), strides=None, padding='valid', data_format=None ), name=f'maxpool_{i}')(x) if use_batchnorm: x = ll.TimeDistributed(ll.BatchNormalization( axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None ), name=f'batchnorm_{i}')(x) x = ll.TimeDistributed(ll.Flatten(), name='flatten')(x) x = ll.TimeDistributed(ll.Dropout(dropout_rate), name='dropout')(x) # LSTM feature combinator if lstm_units is not None and lstm_units > 0: x = ll.CuDNNLSTM( lstm_units, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=l2_reg, recurrent_regularizer=l2_reg, bias_regularizer=l2_reg, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=(combine_mode!='last'), return_state=False, go_backwards=False, stateful=False )(x) # Combine output of each sequence if combine_mode == 'concat': x = ll.Flatten()(x) elif combine_mode == 'last': if lstm_units is None or lstm_units == 0: # if no LSTM was used x = ll.Lambda(lambda x : x[:,-1,...])(x) # we extract the last element elif combine_mode == 'attention': attention = ll.TimeDistributed(ll.Dense(1), name='attention_score')(x) attention = ll.Flatten()(attention) attention = ll.Softmax()(attention) x = ll.dot([x, attention], axes=[-2, -1]) else: raise ValueError(f"Combine mode {combine_mode} not supported") # FCN classifier for fcn_layer in fcn_layers: # extract layer params units = fcn_layer['units'] # build layer x = ll.Dense( units, activation=activation_function, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=l2_reg, bias_regularizer=l2_reg, activity_regularizer=None, kernel_constraint=None, bias_constraint=None )(x) x = ll.Dropout(dropout_rate)(x) prediction = ll.Dense(num_classes, activation='softmax')(x) # Build model model = Model(inputs=inputs, outputs=prediction) return model
def __init__(self, config): super(LSTMEncoder, self).__init__() self.config = config self.rnn = layers.CuDNNLSTM(units=config.hidden_dim, ) self.dropout = layers.Dropout(config.dropout) self.batch_norm = layers.BatchNormalization()
ne_labels = mat['testIsNamedEntity'][indices] if single_track: x_features = x_features[:, :, :5] print(str(num_hidden) + ' hidden units') from tensorflow.keras import layers from tensorflow.keras import regularizers if train_mode: model = tensorflow.keras.Sequential() model.add( layers.Bidirectional( layers.CuDNNLSTM(16, kernel_regularizer=regularizers.l2(beta), recurrent_regularizer=regularizers.l2(beta), bias_regularizer=regularizers.l2(beta), return_sequences=True), input_shape=(timesteps, num_input))) #+ num samples model.add( layers.Bidirectional( layers.CuDNNLSTM(8, kernel_regularizer=regularizers.l2(beta), recurrent_regularizer=regularizers.l2(beta), bias_regularizer=regularizers.l2(beta), return_sequences=True))) model.add( layers.Bidirectional( layers.CuDNNLSTM(8, kernel_regularizer=regularizers.l2(beta), recurrent_regularizer=regularizers.l2(beta),
def retain(ARGS): """Create the model""" # Define the constant for model saving reshape_size = ARGS.emb_size + ARGS.numeric_size if ARGS.allow_negative: embeddings_constraint = FreezePadding() beta_activation = 'tanh' output_constraint = None else: embeddings_constraint = FreezePadding_Non_Negative() beta_activation = 'sigmoid' output_constraint = non_neg() def reshape(data): """Reshape the context vectors to 3D vector""" return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size)) # Code Input codes = L.Input((None, None), name='codes_input') inputs_list = [codes] # Calculate embedding for each code and sum them to a visit level codes_embs_total = L.Embedding( ARGS.num_codes + 1, ARGS.emb_size, name='embedding' # BUG: embeddings_constraint not supported # https://github.com/tensorflow/tensorflow/issues/33755 # ,embeddings_constraint=embeddings_constraint )(codes) codes_embs = L.Lambda(lambda x: K.sum(x, axis=2))(codes_embs_total) # Numeric input if needed if ARGS.numeric_size > 0: numerics = L.Input((None, ARGS.numeric_size), name='numeric_input') inputs_list.append(numerics) full_embs = L.concatenate([codes_embs, numerics], name='catInp') else: full_embs = codes_embs # Apply dropout on inputs full_embs = L.Dropout(ARGS.dropout_input)(full_embs) # Time input if needed if ARGS.use_time: time = L.Input((None, 1), name='time_input') inputs_list.append(time) time_embs = L.concatenate([full_embs, time], name='catInp2') else: time_embs = full_embs # Setup Layers # This implementation uses Bidirectional LSTM instead of reverse order # (see https://github.com/mp2893/retain/issues/3 for more details) # If training on GPU and Tensorflow use CuDNNLSTM for much faster training if glist: alpha = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='alpha') beta = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='beta') else: alpha = L.Bidirectional(L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name='alpha') beta = L.Bidirectional(L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name='beta') alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2)) beta_dense = L.Dense(ARGS.emb_size + ARGS.numeric_size, activation=beta_activation, kernel_regularizer=l2(ARGS.l2)) # Compute alpha, visit attention alpha_out = alpha(time_embs) alpha_out = L.TimeDistributed(alpha_dense, name='alpha_dense_0')(alpha_out) alpha_out = L.Softmax(axis=1)(alpha_out) # Compute beta, codes attention beta_out = beta(time_embs) beta_out = L.TimeDistributed(beta_dense, name='beta_dense_0')(beta_out) # Compute context vector based on attentions and embeddings c_t = L.Multiply()([alpha_out, beta_out, full_embs]) c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t) # Reshape to 3d vector for consistency between Many to Many and Many to One implementations contexts = L.Lambda(reshape)(c_t) # Make a prediction contexts = L.Dropout(ARGS.dropout_context)(contexts) output_layer = L.Dense(1, activation='sigmoid', name='dOut', kernel_regularizer=l2(ARGS.l2), kernel_constraint=output_constraint) # TimeDistributed is used for consistency # between Many to Many and Many to One implementations output = L.TimeDistributed(output_layer, name='time_distributed_out')(contexts) # Define the model with appropriate inputs model = Model(inputs=inputs_list, outputs=[output]) return model
def call(self, inputs): z_mean, z_log_var = inputs batch = tf.shape(z_mean)[0] dim = tf.shape(z_mean)[1] epsilon = tf.keras.backend.random_normal(shape=(batch, dim)) return z_mean + tf.exp(0.5 * z_log_var) * epsilon original_dim = 256 intermediate_dim = 512 latent_dim = 8 # Define encoder model. original_inputs = tf.keras.Input(shape=(original_dim,1), name='encoder_input') input_err = Input(shape=(256,1)) x = layers.CuDNNLSTM(intermediate_dim, return_sequences=False)(original_inputs) z_mean = layers.Dense(latent_dim, name='z_mean')(x) z_log_var = layers.Dense(latent_dim, name='z_log_var')(x) z = Sampling()((z_mean, z_log_var)) encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name='encoder') # Define decoder model. latent_inputs = tf.keras.Input(shape=(latent_dim,), name='z_sampling') x = layers.RepeatVector(original_dim)(latent_inputs) x = layers.CuDNNLSTM(intermediate_dim, return_sequences=True)(x) outputs = layers.TimeDistributed(layers.Dense(1))(x) decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name='decoder') # Define VAE model. outputs = decoder(z) vae = tf.keras.Model(inputs=[original_inputs, input_err], outputs=outputs, name='vae')
#print X_test y_train = load_y(y_train_path) y_test = load_y(y_test_path) ### model = tf.keras.Sequential([ # relu activation layers.Dense(n_hidden, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal', batch_input_shape=(batch_size, n_steps, n_input) ), # cuDNN layers.CuDNNLSTM(n_hidden, return_sequences=True, unit_forget_bias=1.0), layers.CuDNNLSTM(n_hidden, unit_forget_bias=1.0), # layers.LSTM(n_hidden, return_sequences=True, unit_forget_bias=1.0), # layers.LSTM(n_hidden, unit_forget_bias=1.0), layers.Dense(n_classes, kernel_initializer='random_normal', bias_initializer='random_normal', kernel_regularizer=tf.keras.regularizers.l2(lambda_loss_amount), bias_regularizer=tf.keras.regularizers.l2(lambda_loss_amount), activation='softmax' ) ]) model.compile( optimizer=tf.keras.optimizers.Adam(lr=learning_rate, decay=decay_rate),