def build_model(img_width, img_height, characters): # Inputs the model input_img = layers.Input(shape=(img_width, img_height, 1), name="image", dtype="float32") labels = layers.Input(name="label", shape=(None, ), dtype="float32") # First conv block x = layers.Conv2D(32, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="Conv1")(input_img) x = layers.MaxPooling2D((2, 2), name="pool1")(x) # Second conv block x = layers.Conv2D(64, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="Conv2")(x) x = layers.MaxPooling2D((2, 2), name="pool2")(x) # We have used two max pool with pool size and strides 2. # Hence, downsampled feature maps are 4x smaller. # The number of filters in the last layer is 64. # Reshape accordingly before passing the output to the RNN part of the model new_shape = ((img_width // 4), (img_height // 4) * 64) x = layers.Reshape(target_shape=new_shape, name="reshape")(x) x = layers.Dense(64, activation="relu", name="dense1")(x) x = layers.Dropout(0.2)(x) # RNNs x = layers.Bidirectional( layers.LSTM(128, return_sequences=True, dropout=0.25))(x) x = layers.Bidirectional( layers.LSTM(64, return_sequences=True, dropout=0.25))(x) # Output layer x = layers.Dense(len(characters) + 1, activation="softmax", name="dense2")(x) # Add CTC layer for calculating CTC loss at each step output = CTCLayer(name="ctc_loss")(labels, x) # Define the model model = keras.models.Model(inputs=[input_img, labels], outputs=output, name="ocr_model_v1") # Optimizer opt = keras.optimizers.Adam() # Compile the model and return model.compile(optimizer=opt) return model
def __init__(self, z1_dim=32, z2_dim=32, z1_rhus=[256,256], z2_rhus=[256,256], tr_shape=(20,80), mu_nl=None, logvar_nl=None, num_flow_steps=0, name="encoder", **kwargs): super(Encoder, self).__init__(name=name, **kwargs) # latent dims self.z1_dim = z1_dim self.z2_dim = z2_dim # RNN specs for z2_pre_encoder self.z2_rhus = z2_rhus ## Bidirectional LSTMs self.lstm_layer1_z2 = layers.Bidirectional(layers.LSTM(self.z2_rhus[0], return_sequences=True, return_state=True, time_major=False), merge_mode='concat') self.lstm_layer2_z2 = layers.Bidirectional(layers.LSTM(self.z2_rhus[1], return_state=True, time_major=False), merge_mode='concat') # RNN specs for z1_pre_encoder self.z1_rhus = z1_rhus ## Bidirectional LSTMs self.lstm_layer1_z1 = layers.Bidirectional(layers.LSTM(self.z1_rhus[0], return_sequences=True, return_state=True, time_major=False), merge_mode='concat') self.lstm_layer2_z1 = layers.Bidirectional(layers.LSTM(self.z1_rhus[1], return_state=True, time_major=False), merge_mode='concat') # fully connected layers for computation of mu and sigma self.z1mu_fclayer = layers.Dense( z1_dim, activation=mu_nl, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros') self.z1logvar_fclayer = layers.Dense( z1_dim, activation=logvar_nl, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros') self.z2mu_fclayer = layers.Dense( z2_dim, activation=mu_nl, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros') self.z2logvar_fclayer = layers.Dense( z2_dim, activation=logvar_nl, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros') # householder flow self.num_flow_steps = num_flow_steps self.flowlayers = {'z1': {}, 'z2': {}} for i in range(0, self.num_flow_steps): self.flowlayers['z1'][str(i)] = layers.Dense(z1_dim, activation=mu_nl, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros') self.flowlayers['z2'][str(i)] = layers.Dense(z2_dim, activation=mu_nl, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros')
def get_crnn(input_shape, num_classes): img_input = keras.Input(shape=input_shape) x = vgg_style(img_input) # assert x.shape = (batch_size, 1, N, 512) # N is time steps used in following RNN module x = layers.Reshape((-1, 512))(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x) x = layers.Dense(units=num_classes)(x) #logist output return keras.Model(inputs=img_input, outputs=x, name='CRNN')
def build_model(num_classes, img_shape=(32, None, 3)): img_input = keras.Input(shape=img_shape) x = preprocessing.Rescaling(1.0 / 255)(img_input) x = vgg_style(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True), name='bi_lstm1')(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True), name='bi_lstm2')(x) logits = layers.Dense(units=num_classes, name='logits')(x) return keras.Model(inputs=img_input, outputs=logits, name='CRNN')
def blstm_ref47(input_shape): # C'est un model qui utilise 40 log mel # epochs = 20 minibatch = 200 model = tf.keras.models.Sequential() model.add(ly.Bidirectional(ly.LSTM(100, return_sequences=True, dropout=0.25), input_shape=input_shape)) model.add(ly.Bidirectional(ly.LSTM(100, activation='tanh', return_sequences=True))) model.add(ly.Bidirectional(ly.LSTM(100, activation='tanh', return_sequences=True))) model.add(ly.Bidirectional(ly.LSTM(100, activation='tanh', return_sequences=True))) model.add(ly.BatchNormalization()) model.add(ly.Flatten()) return end_model(model, optimizer='RMSprop')
def keras_model_fn_cpu(model_config, vocab_size, embedding_size, embeddings): """ CPU version of Stacked Bi-LSTM and Bi-GRU with Two Fasttext """ ## hyperparams model_name = model_config['model_name'] num_class = model_config['num_class'] lstm_hs = model_config['lstm_hs'] gru_hs = model_config['gru_hs'] learning_rate = model_config['learning_rate'] with tf.device('/cpu:0'): ## build model inputs = ks.Input(shape=(None, ), dtype='int32', name='inputs') embedded_sequences_ft1 = layers.Embedding(vocab_size, embedding_size, trainable=False, mask_zero=False)(inputs) embedded_sequences_ft2 = layers.Embedding(vocab_size, embedding_size, trainable=False, mask_zero=False)(inputs) concat_embed = layers.concatenate( [embedded_sequences_ft1, embedded_sequences_ft2]) concat_embed = layers.SpatialDropout1D(0.5)(concat_embed) x = layers.Bidirectional( layers.LSTM(lstm_hs, recurrent_activation='sigmoid', return_sequences=True))(concat_embed) x, x_h, x_c = layers.Bidirectional( layers.GRU(gru_hs, reset_after=True, recurrent_activation='sigmoid', return_sequences=True, return_state=True))(x) x_1 = layers.GlobalMaxPool1D()(x) x_2 = layers.GlobalAvgPool1D()(x) x_out = layers.concatenate([x_1, x_2, x_h]) x_out = layers.BatchNormalization()(x_out) outputs = layers.Dense(num_class, activation='softmax', name='outputs')(x_out) # outputs model = ks.Model(inputs, outputs, name=model_name) ## compile model.compile(loss='categorical_crossentropy', optimizer=ks.optimizers.Adam(lr=learning_rate, clipnorm=.25, beta_1=0.7, beta_2=0.99), metrics=[ 'categorical_accuracy', ks.metrics.TopKCategoricalAccuracy(k=3) ]) return model
def build_model(num_classes, img_width=None, img_channels=1, img_height=32): """build CNN-RNN model""" img_input = keras.Input(shape=(img_height, img_width, img_channels)) x = vgg_style(img_input) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True), name='bi_lstm1')(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True), name='bi_lstm2')(x) x = layers.Dense(units=num_classes, name='fc1')(x) return keras.Model(inputs=img_input, outputs=x, name='CRNN')
def _add_multiple_lstms(input_layer, num_lstms: int, size: int): lstm_layers = [] # initial layer lstm_layers.append( layers.Bidirectional(layers.LSTM(size, return_sequences=True))(input_layer)) for i in range(1, num_lstms - 1): lstm_layers.append( layers.Bidirectional(layers.LSTM(size, return_sequences=True))( lstm_layers[-1])) # last layer output_layer = layers.Bidirectional(layers.LSTM(size))(lstm_layers[-1]) return output_layer
def create_model(input_shape): model = tf.keras.Sequential([ layers.Input(shape=input_shape), layers.Bidirectional(layers.LSTM(64, return_sequences=True)), layers.Bidirectional(layers.LSTM(64)), layers.Dense(1) ]) opt_fn = tf.keras.optimizers.Adam(learning_rate=learning_rate) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True) model.compile(optimizer=opt_fn, loss=loss_fn, metrics=['accuracy']) return model
def get_model(): regressor = tf.keras.Sequential() regressor.add( layers.Bidirectional(layers.LSTM(units=180, recurrent_dropout=0.175, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2]))) regressor.add(layers.Bidirectional(layers.LSTM(units=128))) regressor.add(layers.Dense(prediction_range)) return regressor
def RNNSpeechModel(nCategories, samplingrate=16000, inputLength=16000): # simple LSTM sr = samplingrate iLen = inputLength inputs = L.Input((iLen, )) x = L.Reshape((1, -1))(inputs) x = Melspectrogram(n_dft=1024, n_hop=128, input_shape=(1, iLen), padding='same', sr=sr, n_mels=80, fmin=40.0, fmax=sr / 2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='mel_stft')(x) x = Normalization2D(int_axis=0)(x) # note that Melspectrogram puts the sequence in shape (batch_size, melDim, timeSteps, 1) # we would rather have it the other way around for LSTMs x = L.Permute((2, 1, 3))(x) x = L.Conv2D(10, (5, 1), activation='relu', padding='same')(x) x = L.BatchNormalization()(x) x = L.Conv2D(1, (5, 1), activation='relu', padding='same')(x) x = L.BatchNormalization()(x) # x = Reshape((125, 80)) (x) # keras.backend.squeeze(x, axis) x = L.Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(x) x = L.Bidirectional(L.LSTM(64, return_sequences=True))( x) # [b_s, seq_len, vec_dim] x = L.Bidirectional(L.LSTM(64))(x) x = L.Dense(64, activation='relu')(x) x = L.Dense(32, activation='relu')(x) output = L.Dense(nCategories, activation='softmax')(x) model = Model(inputs=[inputs], outputs=[output]) return model
def get_model_LSTM(): max_features = 20000 # Only consider the top 20k words inputs = tf.keras.Input(shape=(None, ), dtype="int32") # Embed each integer in a 128-dimensional vector x = layers.Embedding(max_features, 128)(inputs) # Add 2 bidirectional LSTMs x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) # Add a classifier # outputs = layers.Bidirectional(layers.LSTM(64))(x) x = layers.Bidirectional(layers.LSTM(64))(x) outputs = layers.Dense(256, activation=None)(x) model = tf.keras.Model(inputs, outputs) return model
def createLSTMModel(self, sequenceLength, numClasses): input = layers.Input(shape=(sequenceLength, numClasses)) # model = tensorflow.keras.models.Sequential() bLSTM0 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM1 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM2 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM3 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM4 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM5 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM6 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) bLSTM7 = layers.Bidirectional( layers.LSTM(numClasses, return_sequences=False))(input) d1 = layers.Concatenate(axis=1)( [bLSTM0, bLSTM1, bLSTM2, bLSTM3, bLSTM4, bLSTM5, bLSTM6, bLSTM7]) r1 = layers.Dense(numClasses, activation='relu')(d1) r2 = layers.Dense(numClasses, activation='relu')(r1) out = layers.Dense(numClasses, activation='softmax')(r2) model = tensorflow.keras.models.Model(inputs=input, outputs=out) adam = tensorflow.keras.optimizers.Adam(learning_rate=0.0001) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) return model
def LSTM(rnn_units1, rnn_units2, dense_units, dropout_rate): inputs = layers.Input(shape=(250, 6)) x = layers.Bidirectional(layers.LSTM(rnn_units1, return_sequences=True))(inputs) x = layers.Bidirectional(layers.LSTM(rnn_units2))(x) x = layers.Dense(dense_units)(x) x = layers.Dropout(dropout_rate)(x) x = layers.BatchNormalization()(x) x = layers.LeakyReLU()(x) x = layers.Dropout(dropout_rate)(x) outputs = layers.Dense(13, activation="softmax")(x) model = keras.Model(inputs=inputs, outputs=outputs) return model
def rnn_model(): i = layers.Input(x_train.shape[1:]) x = layers.Masking(mask_value=0)(i) x = layers.Bidirectional( layers.LSTM(64, kernel_regularizer=l2(0.001), return_sequences=True))(x) x = layers.Bidirectional(layers.LSTM(32, kernel_regularizer=l2(0.001)))(x) x = layers.Dense(64, activation='relu', kernel_regularizer=l2(0.001))(x) x = layers.Dropout(0.4)(x) x = layers.Dense(64, activation='relu', kernel_regularizer=l2(0.001))(x) x = layers.Dropout(0.4)(x) out = layers.Dense(1, activation='sigmoid')(x) return Model(inputs=i, outputs=out)
def main(): global FLAGS global logger ''' STEP 1: LOAD HYPERPARAMETERS ''' logger = create_logger() FLAGS = load_config() logger.info("FLAGS: \n{}".format(pp.pformat( vars(FLAGS)))) # vars returns __dict__ attribute ''' STEP 2: LOAD AND SPLIT DATASET ''' train_dataset, validate_dataset, test_dataset = load_datasets() ''' STEP 3: LOAD GLOVE EMBEDDINGS ''' prepare_glove_embeddings() load_glove_embeddings() ''' STEP 4: DEFINE MODEL ''' max_hypothesis_length = FLAGS.max_hypothesis_length # max no. words in a question embed_size = FLAGS.embedding_size # how big is each word vector max_features = FLAGS.max_features # how many unique words to use (num rows in embedding vector?) max_premise_length = FLAGS.max_premise_length batch_size = FLAGS.batch_size hidden_length = FLAGS.hidden_length num_epochs = FLAGS.num_epochs import tensorflow.keras.layers as tfl model = tf.keras.Sequential() model.add( tfl.Bidirectional(tfl.LSTM(hidden_length, return_sequences=True), input_shape=(batch_size, max_hypothesis_length, embed_size))) model.add( tfl.Bidirectional(tfl.LSTM(hidden_length, return_sequences=False))) model.add(tfl.Dense(3, activation="softmax")) model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() model.fit(x=train_dataset, epochs=num_epochs, validation_data=validate_dataset, validation_steps=5)
def caleb(train_X, test_X, train_Y, test_Y): train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1])) test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1])) model = keras.Sequential() model.add( layers.Bidirectional(layers.LSTM(32, return_sequences=True, activation="tanh", recurrent_activation="sigmoid"), input_shape=( 1, 34, ))) model.add( layers.Bidirectional(layers.LSTM(128, return_sequences=True, activation="tanh", recurrent_activation="sigmoid", recurrent_dropout=0.1), input_shape=(1, 34))) model.add(layers.Dense(units=18)) #model.summary() model.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=["accuracy"]) model.fit(train_X, train_Y, validation_data=(test_X, test_Y), epochs=30, batch_size=50, verbose=0) results = model.evaluate(test_X, test_Y, verbose=0) vectors = model.predict(test_X) vectors = np.reshape(vectors, (vectors.shape[0], vectors.shape[2])) clf = make_pipeline( StandardScaler(), MLPClassifier(max_iter=1000, solver='adam', activation='logistic', hidden_layer_sizes=(32, 64, 17), batch_size=100)).fit(vectors, test_Y) pred = clf.predict(vectors) print("Accuracy of mlp", accuracy_score(pred, test_Y)) print("MSE", mean_squared_error(pred, test_Y))
def build_model(pn): inp = keras.Input(shape=(102), dtype=tf.int32) emb = layers.Embedding(41, 64, mask_zero=True, embeddings_regularizer=keras.regularizers.l2(1e-5), embeddings_constraint=keras.constraints.max_norm(3))(inp) mask = tf.equal(inp, tf.constant(0, dtype='int32')) emb = layers.Masking(mask_value=0.0)(emb) emb = layers.LayerNormalization(-1, beta_regularizer=keras.regularizers.l2(1e-5), gamma_regularizer=keras.regularizers.l2(1e-5) )(emb) emb = layers.Dropout(0.5)(emb) x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, ))(emb) x = layers.LayerNormalization(-1, beta_regularizer=keras.regularizers.l2(1e-5), gamma_regularizer=keras.regularizers.l2(1e-5) )(x) x = layers.Dropout(0.5)(x) x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, ))(x) x = layers.LayerNormalization(-1, beta_regularizer=keras.regularizers.l2(1e-5), gamma_regularizer=keras.regularizers.l2(1e-5))(x) x = RemoveMask()(x) x = AttentionWithContext(x, mask) x = layers.Dense(256, kernel_regularizer=keras.regularizers.l2(1e-3), bias_regularizer=keras.regularizers.l2(1e-3), activation=activation, )(x) x = layers.Dropout(0.5)(x) x = layers.Dense(64, kernel_regularizer=keras.regularizers.l2(1e-3), bias_regularizer=keras.regularizers.l2(1e-3), activation=activation, )(x) x = layers.Dropout(0.5)(x) y = layers.Dense(1, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(1e-3), bias_regularizer=keras.regularizers.l2(1e-3), )(x) model = keras.Model(inputs=inp, outputs=y) optimizer = keras.optimizers.Adam(learning_rate=0.005) def crosser(y_true, y_pred): y_pred = tf.clip_by_value(y_pred, 1e-6, 1 - 1e-6) loss = - tf.reduce_mean(y_true * tf.math.log(y_pred) + pn * (1 - y_true) * tf.math.log(1 - y_pred)) return loss model.compile(loss=crosser, optimizer=optimizer, metrics=['AUC']) return model
def __init__(self, num_of_layers): super(bilstm_layers, self).__init__() layers_list = list() init_cells = INIT_LSTM_CELL layers_list.append(layers.BatchNormalization()) for i in range(num_of_layers): layers_list.append( layers.Bidirectional( layers.LSTM(init_cells, return_sequences=True))) layers_list.append(layers.Bidirectional(layers.LSTM(init_cells))) self.bilslayers = tf.keras.Sequential(layers_list)
def crnn(num_classes, backbone='original'): img_input = Input(shape=(32, None, 1)) if backbone.lower() == 'original': x = original(img_input) elif backbone.lower() == 'resnet': x = resnet(img_input) x = layers.Reshape((-1, 512))(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x) x = layers.Bidirectional(layers.LSTM(units=256, return_sequences=True))(x) x = layers.Dense(units=num_classes)(x) return Model(inputs=img_input, outputs=x, name='CRNN')
def create_model(inp_shape=(5, 19), gru_units=32, dropout=0.8): q_inputs = tf.keras.Input(name='q_inputs', shape=inp_shape) x = layers.Bidirectional(layers.GRU(gru_units, return_sequences=True))(q_inputs) x = layers.Dropout(dropout)(x) x = layers.Bidirectional(layers.GRU(gru_units, return_sequences=False))(x) x = layers.Dropout(dropout)(x) close = layers.Dense(1, activation='linear', name='close')(x) model = tf.keras.Model(inputs=q_inputs, outputs=close) model.summary() return model
def build_model(self, data): """ data: ? Parameters ---------- vocab : the vocab of the vectorise Returns --------- Prints a summary of the model created. self.model instantiated to be used on train method """ # Build vocab self.vocab, self.vectorizer = gen_vocab(data) # build embedding layer from glove int_sequences_input = keras.Input(shape=(None,), dtype="int64") embedding_layer = embed_matrix(embedding_model=self.glovemodel, vocab=self.vocab, embedding_dim = 100) embedded_sequences = embedding_layer(int_sequences_input) # For stacked x = layers.Bidirectional(layers.LSTM(100, return_sequences=True))(embedded_sequences) x = layers.LSTM(100)(x) # For single layer bidir # x = layers.Bidirectional(layers.LSTM(100))(embedded_sequences) # For basic model # x = layers.LSTM(100)(embedded_sequences) preds = layers.Dense(len(self.vocab), activation = 'softmax')(x) self.model = keras.Model(inputs=int_sequences_input, outputs=preds) self.model.compile(loss="sparse_categorical_crossentropy", optimizer="rmsprop", metrics=["acc"]) self.model.summary()
def __init__(self, vocab_size: int, embed_dim: int, hidden_size: int = 128, training: bool = False): super(MyBasicAttentiveBiGRU, self).__init__() self.num_classes = len(ID_TO_CLASS) self.decoder = layers.Dense(units=self.num_classes) self.omegas = tf.Variable(tf.random.normal((hidden_size * 2, 1))) self.embeddings = tf.Variable(tf.random.normal( (vocab_size, embed_dim))) ### TODO(Students) START # ... forward_layer = layers.GRU(hidden_size, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, use_bias=True) backward_layer = layers.GRU(hidden_size, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, go_backwards=True, use_bias=True) self.biDirectional = layers.Bidirectional( forward_layer, backward_layer=backward_layer, input_shape=(embed_dim, hidden_size))
def build(self, hp, inputs=None): inputs = nest.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] shape = input_node.shape.as_list() if len(shape) != 3: raise ValueError('Expect the input tensor to have ' 'at least 3 dimensions for rnn models, ' 'but got {shape}'.format(shape=input_node.shape)) feature_size = shape[-1] output_node = input_node bidirectional = self.bidirectional if bidirectional is None: bidirectional = hp.Boolean('bidirectional', default=True) layer_type = self.layer_type or hp.Choice( 'layer_type', ['gru', 'lstm'], default='lstm') num_layers = self.num_layers or hp.Choice('num_layers', [1, 2, 3], default=2) rnn_layers = {'gru': layers.GRU, 'lstm': layers.LSTM} in_layer = rnn_layers[layer_type] for i in range(num_layers): return_sequences = True if i == num_layers - 1: return_sequences = self.return_sequences if bidirectional: output_node = layers.Bidirectional( in_layer(feature_size, return_sequences=return_sequences))(output_node) else: output_node = in_layer( feature_size, return_sequences=return_sequences)(output_node) return output_node
def CNN_LSTM(in_shape=(200, 4), num_filters=32, batch_norm=True, activation='relu', lstm_units=128, dense_units=512, num_out=12): inputs = Input(shape=in_shape) nn = layers.Conv1D(filters=num_filters, kernel_size=19, use_bias=False, padding='same')(inputs) if batch_norm: nn = layers.BatchNormalization()(nn) nn = layers.Activation(activation, name='conv_activation')(nn) nn = layers.MaxPool1D(pool_size=24)(nn) nn = layers.Dropout(0.1)(nn) forward = layers.LSTM(lstm_units//2, return_sequences=True) backward = layers.LSTM(lstm_units//2, activation='relu', return_sequences=True, go_backwards=True) nn = layers.Bidirectional(forward, backward_layer=backward)(nn) nn = layers.Dropout(0.1)(nn) nn = layers.Flatten()(nn) nn = layers.Dense(dense_units, use_bias=False)(nn) nn = layers.BatchNormalization()(nn) nn = layers.Activation('relu')(nn) nn = layers.Dropout(0.5)(nn) outputs = layers.Dense(num_out, activation='sigmoid')(nn) return Model(inputs=inputs, outputs=outputs)
def __init__(self, vocab_size: int, embed_dim: int, hidden_size: int = 128, training: bool = False): super(MyBasicAttentiveBiGRU, self).__init__() self.num_classes = len(ID_TO_CLASS) self.decoder = layers.Dense(units=self.num_classes) self.omegas = tf.Variable(tf.random.normal((hidden_size * 2, 1))) self.embeddings = tf.Variable(tf.random.normal( (vocab_size, embed_dim))) ### TODO(Students) START # ... self.vocab_size = vocab_size self.embed_dim = embed_dim self.hidden_size = hidden_size self.fw_layer = layers.GRU(self.hidden_size, return_sequences=True) # bw_layer = layers.GRUCell(self.hidden_size, return_sequnces = True, go_backwards = True) self.bi_layer = layers.Bidirectional(self.fw_layer, input_shape=(self.vocab_size, 2 * self.embed_dim), dtype=tf.float32) self.training = training
def get_model(bidirectional = False, seqModelType = "SimpleRNN", RNNunits = 32): model = keras.Sequential() model.add(layers.InputLayer(input_shape=(None,s))) if seqModelType == "HMM": seqLayer = HMMLayer(5, 15) # (10,15) is better than (5,11) elif seqModelType == "LSTM": seqLayer = layers.LSTM(RNNunits) elif seqModelType == "GRU": seqLayer = layers.GRU(RNNunits) elif seqModelType == "SimpleRNN": seqLayer = layers.SimpleRNN(RNNunits) else: sys.exit("unknown sequence model type " + seqModelType) if bidirectional: seqLayer = layers.Bidirectional(seqLayer) model.add(seqLayer) model.add(layers.Dense(1, activation='sigmoid')) lr = 1e-3 #if seqModelType == "HMM": # lr = 1e-2 print (f"lr={lr}") model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = lr), loss = tf.keras.losses.BinaryCrossentropy(), metrics = ["accuracy"]) return model
def configure_model(model_info, lstm_type='', optimizer = tf.compat.v1.train.AdamOptimizer(0.001)): ''' :param input_size: :param n_classes: :param layers: :param lstm_type: :param optimizer: :param CD: concatenated depth :return: ''' model = tf.keras.Sequential() model.add(layers.Masking(mask_value=1., input_shape=(None, model_info.feat_size))) for l, layer in enumerate(model_info.layers): if l == 0: if lstm_type == 'b': logging.info('Using bidirectional LSTM') model.add(layers.Bidirectional(layers.LSTM(layer, input_shape=(None, model_info.feat_size), dropout=0.1, return_sequences=True, recurrent_dropout=0.1))) else: model.add(layers.LSTM(layer, input_shape=(None, model_info.feat_size), dropout=0.1, recurrent_dropout=0.1, return_sequences=True)) else: model.add(layers.TimeDistributed(layers.Dense(layer,activation='relu'))) model.add(layers.Dropout(0.1)) model.add(layers.TimeDistributed(layers.Dense(model_info.n_classes,activation='softmax'))) model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy']) return model
def __init__(self, vocab_size: int, embed_dim: int, hidden_size: int = 128, training: bool = False): super(MyBasicAttentiveBiGRU, self).__init__() self.num_classes = len(ID_TO_CLASS) self.decoder = layers.Dense(units=self.num_classes) self.omegas = tf.Variable(tf.random.normal((hidden_size * 2, 1))) ### TODO(Students) START self.embeddings = tf.Variable(tf.random.normal( (vocab_size, embed_dim)), trainable=training) self._forward_layer = layers.GRU(hidden_size, return_sequences=True) self._backward_layer = layers.GRU(hidden_size, return_sequences=True, go_backwards=True) self._bidirectional_layer = layers.Bidirectional( self._forward_layer, backward_layer=self._backward_layer, merge_mode='concat')
def make_gru_network(self): x0 = tf.keras.Input(shape=[None, self.num_channels]) x = layers.Masking(mask_value=-1.0)(x0) x = tf.keras.layers.GaussianNoise(0.1)(x) x = layers.BatchNormalization()(x) x_e, x_h_fwd, x_h_bwd = layers.Bidirectional(layers.GRU(units=512, activation='tanh', use_bias=False, kernel_initializer="glorot_normal", return_sequences=True, return_state=True), name="bi_gru")(x) x_e = layers.Dropout(self.drop_prob)(x_e) x_h_fwd = layers.Dropout(self.drop_prob)(x_h_fwd) x_h_bwd = layers.Dropout(self.drop_prob)(x_h_bwd) x_a_fwd, w_a_fwd = BahdanauAttention(1024)(x_h_fwd, x_e) x_a_bwd, w_a_bwd = BahdanauAttention(1024)(x_h_bwd, x_e) x = tf.concat([x_h_fwd, x_a_fwd, x_h_bwd, x_a_bwd], axis=-1) x = layers.Dense(1, activation='sigmoid', use_bias=False, name='prediction')(x) x = tf.math.add(tf.math.multiply(x, 90.0), 190.0) return tf.keras.Model(inputs=x0, outputs=x)