def _build_model(self, x, y): """Construct the predictive model using feature and label statistics. Args: - x: temporal feature - y: labels Returns: - model: predictor model """ # Parameters dim = len(x[0, 0, :]) max_seq_len = len(x[0, :, 0]) model = tf.keras.Sequential() model.add( layers.Masking(mask_value=-1., input_shape=(max_seq_len, dim))) # Stack multiple layers for _ in range(self.n_layer - 1): model = rnn_sequential(model, self.model_type, self.h_dim, return_seq=True) dim_y = len(y.shape) if dim_y == 2: return_seq_bool = False elif dim_y == 3: return_seq_bool = True else: raise ValueError('Dimension of y {} is not 2 or 3.'.format( str(dim_y))) model = rnn_sequential(model, self.model_type, self.h_dim, return_seq_bool, name='intermediate_state') self.adam = tf.keras.optimizers.Adam(learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=False) if self.task == 'classification': if dim_y == 3: model.add( layers.TimeDistributed( layers.Dense(y.shape[-1], activation='sigmoid'))) elif dim_y == 2: model.add(layers.Dense(y.shape[-1], activation='sigmoid')) model.compile(loss=binary_cross_entropy_loss, optimizer=self.adam) elif self.task == 'regression': if dim_y == 3: model.add( layers.TimeDistributed( layers.Dense(y.shape[-1], activation='linear'))) elif dim_y == 2: model.add(layers.Dense(y.shape[-1], activation='linear')) model.compile(loss=mse_loss, optimizer=self.adam, metrics=['mse']) return model
def classifier(base_layers, input_rois, num_rois, nb_classes=2): """ predict the class name for each input anchor and the regression of their bounding box :param base_layers: :param input_rois: :param num_rois: :param nb_classes: :return: """ pooling_regions = 14 input_shape = (num_rois, 14, 14, 1024) x = [base_layers, input_rois] out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)(x) out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True) out = layers.TimeDistributed(layers.Flatten())(out) out_class = layers.TimeDistributed( layers.Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) out_regr = layers.TimeDistributed( layers.Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def build_binary_model(self): """ Construct the graph of the model """ question_input = tf.keras.Input(shape=(self.max_tok_q, ), name='q_input') abstract_input = tf.keras.Input(shape=(self.max_sentences, self.max_tok_sent, ), name='abs_input') # NOT USING MASKING DUE TO CUDNN ERROR: https://github.com/tensorflow/tensorflow/issues/33148 x1 = layers.Embedding(input_dim=self.vocab_size, output_dim=self.hidden_dim, mask_zero=False)(question_input) x1 = layers.Bidirectional(layers.LSTM(self.hidden_dim, dropout=self.dropout, kernel_regularizer=tf.keras.regularizers.l2(0.01)), input_shape=(self.max_tok_q, self.hidden_dim), name='q_bilstm')(x1) # Apply embedding to every sentence x2 = layers.TimeDistributed(layers.Embedding(input_dim=self.vocab_size, output_dim=self.hidden_dim, input_length=self.max_tok_sent, mask_zero=False), input_shape=(self.max_sentences, self.max_tok_sent))(abstract_input) # Apply lstm to every sentence embedding x2 = layers.TimeDistributed(layers.Bidirectional(layers.LSTM(self.hidden_dim, dropout=self.dropout, kernel_regularizer=tf.keras.regularizers.l2(0.01))), input_shape=(self.max_sentences, self.max_tok_sent, self.hidden_dim), name='sentence_distributed_bilstms')(x2) # Make lstm of document representation: # I could also just take this document representation and concatenate it to the single sentence representation, but I don't. x2 = layers.Bidirectional(layers.LSTM(self.hidden_dim, return_sequences=True, dropout=self.dropout, kernel_regularizer=tf.keras.regularizers.l2(0.01)), input_shape=(self.max_sentences, self.hidden_dim * 2), name='document_bilstm')(x2) # Combine question and document x3 = layers.RepeatVector(self.max_sentences)(x1) x4 = layers.concatenate([x2, x3]) # If using integers as class labels, 1 target label can be provided be example (not 1 hot) and the number of labels can be defined here sent_output = layers.Dense(2, activation='sigmoid', name='sent_output', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x4) model = tf.keras.Model(inputs=[question_input, abstract_input], outputs=sent_output) model.summary() model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(1e-4) ) return model
def shortcut_convolution(high_res_img, low_res_target, nb_channels_out): if img_size(low_res_target) == 1: kernel_size = img_size(high_res_img) downsampled_input = kl.TimeDistributed( SpectralNormalization( kl.Conv2D(nb_channels_out, kernel_size, activation=LeakyReLU(0.2))), name='shortcut_conv_1')(high_res_img) else: strides = int( tf.math.ceil( (2 + img_size(high_res_img)) / (img_size(low_res_target) - 1))) margin = 2 padding = int( tf.math.ceil((strides * (img_size(low_res_target) - 1) - img_size(high_res_img)) / 2) + 1 + margin) kernel_size = int(strides * (1 - img_size(low_res_target)) + img_size(high_res_img) + 2 * padding) downsampled_input = kl.TimeDistributed( kl.ZeroPadding2D(padding=padding))(high_res_img) downsampled_input = kl.TimeDistributed( SpectralNormalization( kl.Conv2D(nb_channels_out, kernel_size, strides=strides, activation=LeakyReLU(0.2))), name='shortcut_conv')(downsampled_input) downsampled_input = kl.LayerNormalization()(downsampled_input) return downsampled_input
def build_cnn_autolstm(): global n_past, n_future, n_features inputA = keras.Input(shape=(n_past, int(n_features)), name="cA") inputD = keras.Input(shape=(n_past, int(n_features)), name="cD") #x is the CNN for approximate x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputA) x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(x) x = layers.MaxPooling1D(pool_size=2)(x) x = layers.Flatten()(x) x = layers.Dropout(0.3)(x) x = layers.Dense(100, activation='relu')(x) x = layers.Dropout(0.3)(x) x = layers.Dense(50, activation='relu')(x) # x = layers.Dense(n_future)(x) #y is the LSTM for detail y = layers.CuDNNLSTM(200, return_sequences=False)(inputD) y = layers.RepeatVector(n_future)(y) y = layers.CuDNNLSTM(200, return_sequences=True)(y) y = layers.TimeDistributed(layers.Dense(100, activation='relu'))(y) y = layers.TimeDistributed(layers.Dense(50))(y) y = layers.CuDNNLSTM(50)(y) # y = layers.Reshape((-1,50))(y) # y = layers.Dense(50,activation='sigmoid')(y) #combining 2 lstm com = layers.concatenate([x, y]) # z = LSTM(200, activation='relu', return_sequences=False)(com) # z = Dense(100, activation="relu") z = layers.Dense(n_future)(com) model = keras.Model(inputs=[inputA, inputD], outputs=z) model.compile(loss='mse', optimizer=my_optimizer) model.summary() return model
def __init__(self): super().__init__() self.mobile_net = tf.keras.applications.MobileNetV2( input_shape=config.VC_INPUT_SHAPE[-3:], alpha=0.50, include_top=False, weights='imagenet') self.td_mobile_net = L.TimeDistributed( self.mobile_net, name='time_distributed_mobile_net') self.td_mobile_net.trainable = False self.flatten1 = L.Reshape((config.VC_INPUT_SHAPE[0], -1)) self.key_feature_extractor = L.TimeDistributed( L.Dense(64, activation='relu')) self.batch_norm1 = L.BatchNormalization() self.dropout1 = L.Dropout(0.20) self.dense = L.Dense(32, activation='relu') self.batch_norm2 = L.BatchNormalization() self.dropout2 = L.Dropout(0.20) self.flatten2 = L.Reshape((-1, )) self.event_class_output = L.Dense(len(class_map), activation='softmax', name="output_event")
def build_model(self): inputs = layers.Input(shape=(None, 1, 1)) x = layers.TimeDistributed( layers.Conv1D(filters=64, kernel_size=1, activation='relu'))(inputs) x = layers.TimeDistributed(layers.MaxPooling1D(pool_size=1))(x) x = layers.TimeDistributed(layers.Flatten())(x) x = layers.LSTM(256, activation='relu', return_sequences=True)(x) x = layers.BatchNormalization()(x) x = layers.Dropout(0.2)(x) x = layers.LSTM(256, activation='relu', return_sequences=True)(x) x = layers.BatchNormalization()(x) x = layers.Dropout(0.2)(x) x = layers.LSTM(256, activation='relu')(x) x = layers.BatchNormalization()(x) x = layers.Dropout(0.2)(x) x = layers.Dense(self.num_states, activation='softmax')(x) model = Model(inputs, x, name='DeepChannel') return model
def block(block, filters, kernel_size, x): x = layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='VALID', name='{}_temporal_conv_1'.format(block))(x) x = layers.TimeDistributed(layers.BatchNormalization(), name='{}_bn_1'.format(block))(x) x = layers.Conv1D(filters=filters, kernel_size=kernel_size, padding='VALID', name='{}_temporal_conv_2'.format(block))(x) x = layers.TimeDistributed(layers.BatchNormalization(), name='{}_bn_2'.format(block))(x) x = layers.TimeDistributed(layers.LeakyReLU(alpha=0.01), name='{}_leaky_relu'.format(block))(x) return x
def encoder(X, l2=0.001, dropout=1e-6, lr=0.006, seed=42): tf.random.set_seed(seed) regularizer = keras.regularizers.l2(l2) CustomGRU = partial(keras.layers.GRU, kernel_regularizer=regularizer, dropout=dropout, recurrent_dropout=dropout) ''' For masking, refer: https://www.tensorflow.org/guide/keras/masking_and_padding https://gist.github.com/ragulpr/601486471549cfa26fe4af36a1fade21 ''' model = keras.models.Sequential([ layers.Masking(mask_value=0.0, input_shape=[None, X.shape[-1]]), CustomGRU(16, return_sequences=True), CustomGRU(16, return_sequences=True), CustomGRU(16, return_sequences=True), layers.TimeDistributed(layers.Dense(3, activation='linear')), layers.TimeDistributed(layers.Dense(15, activation='softmax')) ]) optimizer = keras.optimizers.Adam(lr=lr) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['sparse_categorical_accuracy']) return model
def configure_model(model_info, lstm_type='', optimizer = tf.compat.v1.train.AdamOptimizer(0.001)): ''' :param input_size: :param n_classes: :param layers: :param lstm_type: :param optimizer: :param CD: concatenated depth :return: ''' model = tf.keras.Sequential() model.add(layers.Masking(mask_value=1., input_shape=(None, model_info.feat_size))) for l, layer in enumerate(model_info.layers): if l == 0: if lstm_type == 'b': logging.info('Using bidirectional LSTM') model.add(layers.Bidirectional(layers.LSTM(layer, input_shape=(None, model_info.feat_size), dropout=0.1, return_sequences=True, recurrent_dropout=0.1))) else: model.add(layers.LSTM(layer, input_shape=(None, model_info.feat_size), dropout=0.1, recurrent_dropout=0.1, return_sequences=True)) else: model.add(layers.TimeDistributed(layers.Dense(layer,activation='relu'))) model.add(layers.Dropout(0.1)) model.add(layers.TimeDistributed(layers.Dense(model_info.n_classes,activation='softmax'))) model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy']) return model
def rnn_ref1(input_shape): # C'est le model qui va bien avec les MFCC, ils ont un tableau des parmetres d'entre # de 20 MFCC plus leurs derivees de premiers et seconds ordre, ce qui fait 60 model = tf.keras.models.Sequential() model.add(ly.LSTM(128, return_sequences=True, input_shape=input_shape)) model.add(ly.TimeDistributed(ly.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.1)))) model.add(ly.Dropout(0.25)) model.add(ly.BatchNormalization()) model.add(ly.TimeDistributed(ly.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.1)))) model.add(ly.Dropout(0.25)) model.add(ly.BatchNormalization()) model.add(ly.TimeDistributed(ly.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.1)))) model.add(ly.Dropout(0.25)) model.add(ly.BatchNormalization()) model.add(ly.TimeDistributed(ly.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.1)))) model.add(ly.Dropout(0.25)) model.add(ly.BatchNormalization()) model.add(ly.Bidirectional(ly.GRU(256, activation='relu', return_sequences=True, dropout=0.25, kernel_regularizer=tf.keras.regularizers.l2(0.1)))) model.add(ly.Bidirectional(ly.GRU(256, activation='relu', return_sequences=True, dropout=0.25, kernel_regularizer=tf.keras.regularizers.l2(0.1)))) model.add(ly.BatchNormalization()) model.add(ly.Flatten()) return end_model(model)
def build_cnn_auto_colab(): global n_past, n_future, n_features inputA = keras.Input(shape=(n_past, int(n_features)), name="cA") inputD = keras.Input(shape=(n_past, int(n_features)), name="cD") #x is the LSTM for approximate x = layers.Conv1D(filters=128, kernel_size=2, activation='relu', name="cA_CNN")(inputA) x = layers.MaxPooling1D(pool_size=2)(x) x = layers.Flatten()(x) x = layers.Dense(100, activation='relu')(x) x = layers.BatchNormalization()(x) x = layers.Dropout(0.2)(x) x = layers.Dense(100, activation='relu')(x) #y is the EN_LSTM for detail y = layers.LSTM(200, activation="relu", name="cD_LSTM")(inputD) # y = layers.BatchNormalization()(y) y = layers.RepeatVector(n_future)(y) y = layers.LSTM(200, activation="relu", return_sequences=True)(y) # y = layers.BatchNormalization()(y) y = layers.TimeDistributed(layers.Dense(100, activation='relu'))(y) y = layers.TimeDistributed(layers.Dense(50))(y) y = layers.LSTM(100, activation="sigmoid")(y) #combining 2 lstm com = layers.concatenate([x, y]) z = layers.Dense(n_future)(com) model = keras.Model(inputs=[inputA, inputD], outputs=z) model.compile(loss='mse', optimizer=my_optimizer) return model
def time_distributed_graph(input, nodes, last_activation, last_bn, activation, name_prefix, kernel_initialzer, weight_decay, bn): nodes_count = len(nodes) x = input for i in range(nodes_count - 1): x = KL.TimeDistributed( KL.Dense(nodes[i], activation=activation, kernel_initializer=kernel_initialzer, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), name=name_prefix + '_Dense{}'.format(i + 1)))(x) # x = LayerNormalization(epsilon=1e-6)(x) activation = activation if last_activation else None x = KL.TimeDistributed( KL.Dense(nodes[-1], activation=activation, kernel_initializer=kernel_initialzer, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), name=name_prefix + '_Dense{}'.format(nodes_count)))(x) # if last_bn: # x = LayerNormalization(epsilon=1e-6)(x) return x
def _define_model(self, ): input_series = keras.Input(shape=(self.sequence_length, self.num_features)) # LSTM encoder x = layers.LSTM(200, activation=self.activation, return_sequences=True)(input_series) encoded = layers.LSTM(100, activation=self.activation, dropout=0.2, return_sequences=False)(x) encoder = keras.Model(input_series, encoded) # LSTM decoder x = layers.RepeatVector(self.sequence_length)(encoded) x = layers.LSTM(100, activation=self.activation, return_sequences=True)(x) x = layers.LSTM(200, activation=self.activation, return_sequences=True)(x) x = layers.TimeDistributed(layers.Dense(16))(x) decoded = layers.TimeDistributed(layers.Dense(self.num_features))(x) autoencoder = keras.Model(input_series, decoded) optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate, clipnorm=1.0, clipvalue=0.5) autoencoder.compile(optimizer=optimizer, loss=self.loss) self.model = autoencoder self.encoder = encoder self.model.summary()
def genmodel(): cnn = tf.keras.Sequential() cnn.add( layers.TimeDistributed(layers.Conv2D(96, (2, 2), strides=(1, 1), activation='relu'), input_shape=(672, 9, 5, 2)) ) # (5,9,2,672) is the exact shape that data.mat has when loaded with loadmat. Values should be added dynamically #TODO cnn.add( layers.TimeDistributed( layers.MaxPool2D(pool_size=(2, 2), strides=(1, 1)))) cnn.add(layers.TimeDistributed(layers.Conv2D(96, (2, 2), strides=(1, 1)))) cnn.add( layers.TimeDistributed( layers.MaxPool2D(pool_size=(2, 2), strides=(1, 1)))) cnn.add(layers.TimeDistributed(layers.Flatten())) cnn.add(layers.LSTM(units=512, input_shape=(10, 512))) cnn.add(layers.Dense(units=64)) cnn.add(layers.Dropout(rate=0.33)) cnn.add(layers.Dense(units=3)) cnn.add(layers.Softmax()) cnn.build() cnn.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) return cnn
def Ladder_Net_With_Y_Labels(shape_, shape_2, output_bins_speed, output_bins_av): input_ = layers.Input(shape = shape_) enc1 = layers.LSTM(200, return_sequences=True)(input_) norm_enc1 = layers.LayerNormalization()(enc1) enc2 = layers.LSTM(200, return_sequences= True)(norm_enc1) norm_enc2 = layers.LayerNormalization()(enc2) enc3 = layers.LSTM(200, return_sequences= True)(norm_enc2) norm_enc3 = layers.LayerNormalization()(enc3) dec3 = layers.LSTM(200, return_sequences=True)(norm_enc3) norm_dec3 = layers.LayerNormalization()(dec3) comb_nenc2_ndec3 = layers.concatenate([norm_enc2, norm_dec3]) dec2 = layers.LSTM(200, return_sequences=True)(comb_nenc2_ndec3) norm_dec2 = layers.LayerNormalization()(dec2) comb_nenc1_ndec2 = layers.concatenate([norm_enc1, norm_dec2]) dec1 = layers.LSTM(200, return_sequences=True)(comb_nenc2_ndec3) norm_dec1 = layers.LayerNormalization()(dec1) y_label_output = layers.TimeDistributed(layers.Dense(1, activation = 'tanh', name = 'y_label'))(norm_enc3) speed_binned = layers.TimeDistributed(layers.Dense(output_bins_speed, activation = 'softmax', name = 'speed_binned'))(norm_dec1) angular_velocity_binned = layers.TimeDistributed(layers.Dense(output_bins_av, activation = 'softmax', name = 'angular_velocity_binned'))(norm_dec1) opt = optimizers.Adam(lr=0.001) model = models.Model(inputs = input_, outputs = [speed_binned, angular_velocity_binned, y_label_output], name = 'ladder_net_with_y_labels') model.compile(loss = [losses.categorical_crossentropy, losses.categorical_crossentropy, losses.MSE], optimizer= opt, metrics = ['categorical_accuracy', 'accuracy']) return model
def buildModel(batchSize, windowSize): vorticityInput = keras.Input(shape=(windowSize, ) + lowFreqRes, batch_size=batchSize, name=VORTICITY.name) inflowInput = keras.Input(shape=(windowSize, ) + sourceSize, batch_size=batchSize, name=INFLOW.name) flatInputVort = layers.Reshape((windowSize, 128))(vorticityInput) flatInputVort = layers.TimeDistributed(layers.Dense(128))(flatInputVort) flatInputInflow = layers.Reshape((windowSize, 12 * 7))(inflowInput) flatInputInflow = layers.TimeDistributed(layers.Dense(12 * 7))(flatInputInflow) flatInput = layers.Concatenate(axis=2)([flatInputVort, flatInputInflow]) first = layers.LSTM(80, activation='tanh', stateful=True, return_sequences=True)(flatInput) x2 = layers.LSTM(80, stateful=True, return_sequences=True)(first) x1 = layers.Add()([first, x2]) x1 = layers.LSTM(80, stateful=True, return_sequences=False)(x1) x = layers.Reshape((1, 1, 80))(x1) x = layers.Dense(256, activation='tanh')(x) x = layers.Dense(1024, activation='tanh')(x) x = layers.Dense(outputSize)(x) output = layers.Reshape(outputRes, name=VORTICITY.asOut())(x) model = keras.Model(inputs=[vorticityInput, inflowInput], outputs=output) if outputFormat == Format.SPATIAL: model.compile(loss=keras.losses.mse, optimizer=keras.optimizers.RMSprop(learning_rate=0.001)) else: model.compile(loss=keras.losses.mse, optimizer=keras.optimizers.RMSprop()) return model
def enc_block(inp, features_nb): x = inp x = layers.TimeDistributed(layers.MaxPool2D())(x) x = layers.TimeDistributed( layers.Conv2D(features_nb, 3, activation='relu', padding='same'))(x) x = layers.TimeDistributed( layers.Conv2D(features_nb, 3, activation='relu', padding='same'))(x) return x
def dec_block(inp, shortcut, features_nb): x = layers.TimeDistributed(layers.UpSampling2D())(inp) x = layers.concatenate([x, shortcut], axis=-1) x = layers.TimeDistributed( layers.Conv2D(features_nb, 3, activation='relu', padding='same'))(x) x = layers.TimeDistributed( layers.Conv2D(features_nb, 3, activation='relu', padding='same'))(x) return x
def catNetwork(trackShape, trackCategories): ''' Track category classifier taking input with the same shape as the tag network, using a recurrent layer. Outputs are returned per event as shape (nBatch, nTracks, nCategories). _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) (None, 100, 18) 0 _________________________________________________________________ mask (Masking) (None, 100, 18) 0 _________________________________________________________________ td_dense1 (TimeDistributed) (None, 100, 32) 608 _________________________________________________________________ track_gru (GRU) (None, 100, 32) 6240 _________________________________________________________________ noseq_gru (GRU) (None, 100, 32) 6240 _________________________________________________________________ time_distributed_1 (TimeDist (None, 100, 32) 1056 _________________________________________________________________ time_distributed_2 (TimeDist (None, 100, 32) 1056 _________________________________________________________________ outputCat (Dense) (None, 100, 4) 132 ================================================================= Total params: 15,332 Trainable params: 15,332 Non-trainable params: 0 _________________________________________________________________ ''' trackInput = Klayers.Input(trackShape) tracks = Klayers.Masking(mask_value=-999, name='mask')(trackInput) tracks = Klayers.TimeDistributed(Klayers.Dense(32, activation='relu'), name='td_dense1')(tracks) tracks = Klayers.GRU(32, activation='relu', return_sequences=True, name='track_gru')(tracks) tracks = Klayers.GRU(32, activation='relu', return_sequences=True, recurrent_dropout=0.5, name='noseq_gru')(tracks) tracks = Klayers.TimeDistributed( Klayers.Dense(32, activation='relu', name='out_dense_1'))(tracks) tracks = Klayers.TimeDistributed( Klayers.Dense(32, activation='relu', name='out_dense_4'))(tracks) outputCat = Klayers.Dense(trackCategories, activation='softmax', name='outputCat')(tracks) return Model(inputs=trackInput, outputs=outputCat)
def build_function(max_seq_length, dropout): input_ids = layers.Input(shape=(max_seq_length), name="input_ids") input_mask = layers.Input(shape=(max_seq_length), name="input_mask") segment_ids = layers.Input(shape=(max_seq_length), name="segment_ids") bert_input = [input_ids, input_mask, segment_ids] gs_folder_bert = "bert_en_uncased_L-12_H-768_A-12_3" bert_output = BertLayer(path_to_bert=gs_folder_bert)(bert_input) #start_predictions_layer = layers.Dense(2, activation='softmax', name = "start_prediction_layer") #end_predictions_layer = layers.Dense(2, activation='softmax', name = "end_prediction_layer") start_predictions_layer = layers.Dense(1, activation='sigmoid', name="start_prediction_layer") end_predictions_layer = layers.Dense(1, activation='sigmoid', name="end_prediction_layer") start_logits = layers.TimeDistributed(start_predictions_layer, name="start_logits")(bert_output) end_logits = layers.TimeDistributed(end_predictions_layer, name="end_logits")(bert_output) span_matrix = layers.Lambda(span_matrix_func, name="span_matrix")(bert_output) '''span_logits = layers.Conv2D( 1, 1, input_shape = (max_seq_length, max_seq_length, 2*768), activation="relu", name = "span_logits" )(span_matrix)''' span_layer_1 = layers.Dense(768 * 2, input_shape=(max_seq_length, max_seq_length, 768 * 2), activation='relu', name="span_dense_1")(span_matrix) span_drop_layer_1 = layers.Dropout(dropout, input_shape=(max_seq_length, max_seq_length, 768 * 2), name="span_drop_1")(span_layer_1) #span_layer_2 = layers.Dense(768*2, input_shape = (max_seq_length, max_seq_length, 768*2), activation = 'relu', name = "span_dense_2")(span_drop_layer_1) #span_drop_layer_2 = layers.Dropout(dropout, input_shape = (max_seq_length, max_seq_length, 768*2), name = "span_drop_2")(span_layer_2) span_logits = layers.Dense(1, input_shape=(max_seq_length, max_seq_length, 768 * 2), name="span_dense_3", activation='sigmoid')(span_drop_layer_1) flat_span = layers.Flatten(name="span_flat")(span_logits) flat_start = layers.Flatten(name="start_flat")(start_logits) flat_end = layers.Flatten(name="end_flat")(end_logits) outputs = [flat_start, flat_end, flat_span] model = models.Model(inputs=bert_input, outputs=outputs) return model
def create_feature_output(name, input_layer): feature = layers.TimeDistributed(layers.Dense( units=64, activation="relu"))(input_layer) feature = layers.TimeDistributed(layers.Dropout(rate=0.2))(feature) feature = layers.TimeDistributed(layers.Dense(units=1, activation="relu"), name=name)(feature) return feature
def __init__(self, units, dropout=0.1, l2=None, **kwargs): super(TimeSelfAttention, self).__init__(**kwargs) self.dropout = layers.Dropout(dropout) self.attention = layers.TimeDistributed( layers.Dense(units=1, kernel_regularizer=regularizers.l2(l2))) self.value_layer = layers.TimeDistributed( layers.Dense(units=units, activation='relu', kernel_regularizer=regularizers.l2(l2)))
def dense_unit(x, units, dropout=0.45): regularizer = KL.regularizers.l2(0.001) x = KL.TimeDistributed( KL.Dense(units, activation='relu', kernel_regularizer=regularizer))(x) if dropout > 0: x = KL.TimeDistributed(KL.Dropout(dropout))(x) return x
def GRUEncoder(X, gru_model_path, k_layers=1, k_hidden=32, k_dim = 3, k_class = 15, l2=0.001, dropout=1e-6, lr=0.006, seed=42): ''' GRU Encoder: classification after supervised dim reduction Parameters ---------- X: tensor (batch x time x feat) k_layers: int, number of hidden layers k_hidden: int, number of units k_dim: int, reduce to k_dim k_class: int, number of classes Returns ------- model: complied model ''' tf.random.set_seed(seed) regularizer = keras.regularizers.l2(l2) ''' Transfer Learning ----------------- Using pretrained gru model for finetuning DR_layer ''' gru_model = keras.models.load_model(gru_model_path) gru_model.trainable = False ''' For masking, refer: https://www.tensorflow.org/guide/keras/masking_and_padding https://gist.github.com/ragulpr/601486471549cfa26fe4af36a1fade21 ''' input_layers = [layers.Masking(mask_value=0.0, input_shape = [None, X.shape[-1]])] hidden_layers = [gru_model.layers[1]] DR_layer = [layers.TimeDistributed(layers.Dense(k_dim,activation='linear'))] output_layer = [layers.TimeDistributed(layers.Dense(k_class,activation='softmax'))] optimizer = keras.optimizers.Adam(lr=lr) model = keras.models.Sequential(input_layers + hidden_layers + DR_layer + output_layer) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer,metrics=['sparse_categorical_accuracy']) return model
def _build_model(self, x, y): """Construct the transfer learning model using feature and label stats. Args: - x: temporal feature - y: labels Returns: - model: transfer learning model """ # Parameters dim_y = len(y.shape) # Model initialization model = tf.keras.Sequential() adam = tf.keras.optimizers.Adam(learning_rate=self.learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=False) # For one-shot prediction, use MLP if dim_y == 2: for _ in range(self.n_layer - 1): model.add(layers.Dense(self.h_dim, activation='sigmoid')) # For online prediction, use time-series model elif dim_y == 3: for _ in range(self.n_layer - 1): model = rnn_sequential(model, self.model_type, self.h_dim, return_seq=True) # For classification if self.task == 'classification': if dim_y == 3: model.add( layers.TimeDistributed( layers.Dense(y.shape[-1], activation='sigmoid'))) elif dim_y == 2: model.add(layers.Dense(y.shape[-1], activation='sigmoid')) model.compile(loss=binary_cross_entropy_loss, optimizer=adam) # For regression elif self.task == 'regression': if dim_y == 3: model.add( layers.TimeDistributed( layers.Dense(y.shape[-1], activation='linear'))) elif dim_y == 2: model.add(layers.Dense(y.shape[-1], activation='linear')) model.compile(loss=mse_loss, optimizer=adam, metrics=['mse']) return model
def get_deepspeech(input_dim, output_dim, context=7, units=1024, dropouts=(0.1, 0.1, 0), random_state=1) -> keras.Model: """ The `get_deepspeech` returns the graph definition of the DeepSpeech model. Then simple architectures like this can be easily serialize. Default parameters are overwrite only wherein it is needed. Reference: "Deep Speech: Scaling up end-to-end speech recognition." (https://arxiv.org/abs/1412.5567) """ np.random.seed(random_state) tf.random.set_seed(random_state) # Create model under CPU scope and avoid OOM, errors during concatenation # a large distributed model. with tf.device('/cpu:0'): # Define input tensor [batch, time, features] input_tensor = layers.Input([None, input_dim], name='X') # Add 4th dimension [batch, time, frequency, channel] x = layers.Lambda(keras.backend.expand_dims, arguments=dict(axis=-1))(input_tensor) # Fill zeros around time dimension x = layers.ZeroPadding2D(padding=(context, 0))(x) # Convolve signal in time dim receptive_field = (2 * context + 1, input_dim) x = layers.Conv2D(filters=units, kernel_size=receptive_field)(x) # Squeeze into 3rd dim array x = layers.Lambda(keras.backend.squeeze, arguments=dict(axis=2))(x) # Add non-linearity x = layers.ReLU(max_value=20)(x) # Use dropout as regularization x = layers.Dropout(rate=dropouts[0])(x) # 2nd and 3rd FC layers do a feature extraction base on a narrow # context of convolutional layer x = layers.TimeDistributed(layers.Dense(units))(x) x = layers.ReLU(max_value=20)(x) x = layers.Dropout(rate=dropouts[1])(x) x = layers.TimeDistributed(layers.Dense(units))(x) x = layers.ReLU(max_value=20)(x) x = layers.Dropout(rate=dropouts[2])(x) # Use recurrent layer to have a broader context x = layers.Bidirectional(layers.LSTM(units, return_sequences=True), merge_mode='sum')(x) # Return at each time step logits along characters. Then CTC # computation is more stable, in contrast to the softmax. output_tensor = layers.TimeDistributed(layers.Dense(output_dim))(x) model = keras.Model(input_tensor, output_tensor, name='DeepSpeech') return model
def build_fpn_mask_graph( rois, #目标实物检测结果,标准坐标[batch, num_rois, (y1, x1, y2, x2)] feature_maps, #骨干网之后的fpn特征[P2, P3, P4, P5] image_meta, pool_size, num_classes, batch_size, train_bn=True): """ 返回: Masks [batch, roi_count, height, width, num_classes] """ #ROIAlign 最终统一池化的大小为14 # Shape: [batch, boxes, pool_height, pool_width, channels] x = PyramidROIAlign(batch_size, [pool_size, pool_size], name="roi_align_mask")([rois, image_meta] + feature_maps) # Conv layers x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1")(x) x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_mask_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2")(x) x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_mask_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3")(x) x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_mask_bn3')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4")(x) x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_mask_bn4')(x, training=train_bn) x = KL.Activation('relu')(x) #(1, ?, 14, 14, 256) #使用反卷积进行上采样 x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv")(x) #(1, ?, 28, 28, 256) #用卷积代替全连接 x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"), name="mrcnn_mask")(x) return x
def Moritz_Couzin_Tourus(shape_, output_bins_speed, output_bins_av): input_ = layers.Input(shape= shape_) h1 = layers.LSTM(10, return_sequences=True)(input_) norm_1 = layers.LayerNormalization()(h1) speed_binned = layers.TimeDistributed(layers.Dense(output_bins_speed, activation = 'softmax', name = 'speed_binned'))(norm_1) angular_velocity_binned = layers.TimeDistributed(layers.Dense(output_bins_av, activation = 'softmax', name = 'angular_velocity_binned'))(norm_1) model = models.Model(inputs = input_, outputs = [speed_binned, angular_velocity_binned], name = 'moritz_couzin_tourus') opt = optimizers.Adam(learning_rate=0.0001) model.compile(loss = [losses.categorical_crossentropy, losses.categorical_crossentropy], optimizer= opt, metrics = ['categorical_accuracy']) return model
def __init__(self, num_students, num_skills, max_sequence_length, embed_dim=200, hidden_units=100, dropout_rate=0.2): x = tf.keras.Input(shape=(max_sequence_length, num_skills * 2), name='x') q = tf.keras.Input(shape=(max_sequence_length, num_skills), name='q') emb = layers.Dense( embed_dim, trainable=False, kernel_initializer=tf.keras.initializers.RandomNormal(seed=777), input_shape=(None, max_sequence_length, num_skills * 2)) mask = layers.Masking(mask_value=0, input_shape=(max_sequence_length, embed_dim)) lstm = layers.LSTM(hidden_units, return_sequences=True) out_dropout = layers.TimeDistributed(layers.Dropout(dropout_rate)) out_sigmoid = layers.TimeDistributed( layers.Dense(num_skills, activation='sigmoid')) dot = layers.Multiply() # HACK: the shape of q does not fit to Timedistributed operation(may be correct?) # dot = layers.TimeDistributed(layers.Multiply()) reduce_sum = layers.Dense( 1, trainable=False, kernel_initializer=tf.keras.initializers.constant(value=1), input_shape=(None, max_sequence_length, num_skills)) # reshape layer does not work as graph # reshape_l = layers.Reshape((-1,6),dynamic=False)#, final_mask = layers.TimeDistributed(layers.Masking( mask_value=0, input_shape=(None, max_sequence_length, 1)), name='outputs') # define graph n = emb(x) masked_n = mask(n) h = lstm(masked_n) o = out_dropout(h) y_pred = out_sigmoid(o) y_pred = dot([y_pred, q]) # HACK: without using layer(tf.reduce) might be faster # y_pred = reduce_sum(y_pred, axis=2) y_pred = reduce_sum(y_pred) outputs = final_mask(y_pred) # KEEP: another approach for final mask # patch initial mask by boolean_mask(tensor, mask) #tf.boolean_mask(y_pred, masked_n._keras_mask) #y_pred._keras_mask=masked_n._keras_mask super().__init__(inputs=[x, q], outputs=outputs, name="DKTModel")