def build(size, seq_len , learning_rate , optimizer_class ,\ initial_weights ,\ cnn_class ,\ pre_weights , \ lstm_conf , \ cnn_train_type, classes = 1, dropout = 0.0): input_layer = Input(shape=(seq_len, size, size, 3)) if(cnn_train_type!='train'): if cnn_class.__name__ == "ResNet50": cnn = cnn_class(weights=pre_weights, include_top=False,input_shape =(size, size, 3)) else: cnn = cnn_class(weights=pre_weights,include_top=False) else: cnn = cnn_class(include_top=False) #control Train_able of CNNN if(cnn_train_type=='static'): for layer in cnn.layers: layer.trainable = False if(cnn_train_type=='retrain'): for layer in cnn.layers: layer.trainable = True cnn = TimeDistributed(cnn)(input_layer) #the resnet output shape is 1,1,20148 and need to be reshape for the ConvLSTM filters # if cnn_class.__name__ == "ResNet50": # cnn = Reshape((seq_len,4, 4, 128), input_shape=(seq_len,1, 1, 2048))(cnn) lstm = lstm_conf[0](**lstm_conf[1])(cnn) lstm = MaxPooling2D(pool_size=(2, 2))(lstm) flat = Flatten()(lstm) flat = BatchNormalization()(flat) flat = Dropout(dropout)(flat) linear = Dense(1000)(flat) relu = Activation('relu')(linear) linear = Dense(256)(relu) linear = Dropout(dropout)(linear) relu = Activation('relu')(linear) linear = Dense(10)(relu) linear = Dropout(dropout)(linear) relu = Activation('relu')(linear) activation = 'sigmoid' loss_func = 'binary_crossentropy' if classes > 1: activation = 'softmax' loss_func = 'categorical_crossentropy' predictions = Dense(classes, activation=activation)(relu) model = Model(inputs=input_layer, outputs=predictions) optimizer = optimizer_class[0](lr=learning_rate, **optimizer_class[1]) model.compile(optimizer=optimizer, loss=loss_func,metrics=['acc']) print(model.summary()) return model
def extract_layer(input_tensor=None): x = TimeDistributed( Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', trainable=True))(input_tensor) x = TimeDistributed( Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', trainable=True))(x) x = TimeDistributed( MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))(x) # Block 2 x = TimeDistributed( Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', trainable=True))(x) x = TimeDistributed( Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', trainable=True))(x) x = TimeDistributed( MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))(x) return x
def dense_cnn(input, nclass): _dropout_rate = 0.2 _weight_decay = 1e-4 _nb_filter = 64 # conv 64 5*5 s=2 x = Conv2D(_nb_filter, (5, 5), strides=(2, 2), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(_weight_decay))(input) # 64 + 8 * 8 = 128 x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) # 128 x, _nb_filter = transition_block(x, 128, True, _dropout_rate, 2, _weight_decay) # 128 + 8 * 8 = 192 x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) # 192 -> 128 x, _nb_filter = transition_block(x, 128, False, _dropout_rate, 1, _weight_decay) # 128 + 8 * 8 = 192 x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) x = BatchNormalization(axis=-1, epsilon=1.1e-5)(x) x = Activation('relu')(x) x = Permute((2, 1, 3), name='permute')(x) x = TimeDistributed(Flatten(), name='flatten')(x) y_pred = Dense(nclass, name='out1', activation='softmax')(x) # basemodel = Model(inputs=input, outputs=y_pred) # basemodel.summary() return y_pred
def bidirectional_model(): length_vocab, embedding_size = word2vec.shape model = Sequential() model.add( Embedding(length_vocab, embedding_size, input_length=parameters.max_length, weights=[word2vec], mask_zero=True, name='embedding_layer')) for i in range(parameters.rnn_layers): bilstm = Bidirectional( LSTM(parameters.rnn_size, return_sequences=True, name='bilstm_layer_%d' % (i + 1))) model.add(bilstm) model.add( Lambda(simple_context, mask=lambda inputs, mask: mask[:, parameters.max_len_desc:], output_shape=lambda input_shape: (input_shape[0], parameters.max_len_head, 2 * (parameters.rnn_size - parameters.activation_rnn_size)), name='simple_context_layer')) vocab_size = word2vec.shape[0] model.add(TimeDistributed(Dense(vocab_size, name='time_distributed_layer'))) model.add(Activation('softmax', name='activation_layer')) model.compile(loss='categorical_crossentropy', optimizer='adam') K.set_value(model.optimizer.lr, np.float32(parameters.learning_rate)) print(model.summary()) return model
def create_model(): length_vocab, embedding_size = word2vec.shape print("shape of word2vec matrix ", word2vec.shape) model = Sequential() model.add( Embedding(length_vocab, embedding_size, input_length=parameters.max_length, weights=[word2vec], mask_zero=True, name='embedding_layer')) for i in range(parameters.rnn_layers): gru = GRU(parameters.rnn_size, return_sequences=True, name='gru_layer_%d' % (i + 1)) model.add(gru) model.add( Lambda(simple_context, mask=lambda inputs, mask: mask[:, parameters.max_len_desc:], output_shape=output_shape_simple_context_layer, name='simple_context_layer')) vocab_size = word2vec.shape[0] model.add(TimeDistributed(Dense(vocab_size, name='time_distributed_layer'))) model.add(Activation('softmax', name='activation_layer')) model.compile(loss='categorical_crossentropy', optimizer='adam') K.set_value(model.optimizer.lr, np.float32(parameters.learning_rate)) print(model.summary()) return model
def __init__(self, model, layer_num, batch_size, chunk_size, training, vocab_size, embedding_dim, rnn_units, embed_dropout, input_dropout, dropout, rnn_dropout, w_dropout, w_decay, tied): """Initializes the parameters of the model. Args: model: Specify model of RNN layer (on_lstm, lstm or gru). layer_num: Number of RNN layers. chunk_size: Number of units per chunk in the RNN layers. training: Whether or not in the training mode (whether or not to apply the dropout). vocab_size: Size of vocabulary. embedding_dim: Size of word embedding. rnn_units: Number of hidden units per RNN layer. embed_dropout: Dropout to remove words from embedding layer. input_dropout: Dropout for input embedding layers. dropout: Dropout between RNN layers. rnn_dropout: Recurrent dropout. w_dropout: Amount of weight dropout to apply to the RNN hidden to hidden matrix. w_decay: Weight decay applied to all weight. tied: Whether or not to tie the weight between the embedding layer and the decoder layer. """ super(RNNModel, self).__init__() self.model = model self.layer_num = layer_num self.batch_size = batch_size self.chunk_size = chunk_size self.training = training self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.rnn_units = rnn_units self.dropout = dropout self.rnn_dropout = rnn_dropout self.w_dropout = w_dropout self.input_dropout = input_dropout self.regularizer = regularizers.l2(w_decay) self.dropout_embedding = \ embed_regularize.DropoutEmbedding(training=self.training, input_dim=vocab_size, output_dim=embedding_dim, embeddings_regularizer=self.regularizer, dropout=embed_dropout) self.tied = tied self.rnn_layer = [None] * layer_num if self.tied: self.rnn_layer_sizes = [rnn_units] * (layer_num - 1) + [embedding_dim] self.tied_to = self.dropout_embedding else: self.rnn_layer_sizes = [rnn_units] * layer_num self.tied_to = None rnn_model_args = dict( return_sequences=True, return_state=True, stateful=True, kernel_regularizer=self.regularizer, recurrent_regularizer=self.regularizer, dropout=w_dropout, recurrent_dropout=rnn_dropout, batch_input_shape=(self.batch_size, None, None)) if self.model == 'on_lstm': for i in range(layer_num): self.rnn_layer[i] = \ on_lstm_layer.OrderedNeuronLSTM(units=self.rnn_layer_sizes[i], chunk_size=chunk_size, **rnn_model_args) if self.model == 'lstm': for i in range(layer_num): self.rnn_layer[i] = \ tf.keras.layers.LSTM(units=self.rnn_layer_sizes[i], **rnn_model_args) if self.model == 'gru': for i in range(layer_num): self.rnn_layer[i] = \ tf.keras.layers.GRU(units=self.rnn_layer_sizes[i], **rnn_model_args) self.output_layer = TimeDistributed( tied_weight.TiedDense(units=vocab_size, tied_to=self.tied_to, kernel_regularizer=self.regularizer))
def build_model(): """ Function that build the CNN + LSTM network """ with tf.name_scope('CNN_LSTM'): model = Sequential() with tf.name_scope('Conv1'): model.add( TimeDistributed(Convolution2D(16, (5, 5), padding='same', strides=(2, 2)), input_shape=(15, 16, 3200, 1), name='Conv1')) model.add(BatchNormalization()) model.add(Activation('relu')) with tf.name_scope('Conv2'): model.add( TimeDistributed( Convolution2D(32, (5, 5), padding='same', strides=(1, 1), name='Conv2'))) model.add(Activation('relu')) with tf.name_scope('Pooling'): model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2)))) with tf.name_scope('Conv3'): model.add( TimeDistributed( Convolution2D(32, (5, 5), padding='same', strides=(1, 1), name='Conv3'))) model.add(Activation('relu')) with tf.name_scope('Conv4'): model.add( TimeDistributed( Convolution2D(32, (5, 5), padding='same', strides=(1, 1), name='Conv4'))) model.add(Activation('relu')) with tf.name_scope('Pooling'): model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2)))) with tf.name_scope('FC1'): model.add(TimeDistributed(Flatten(), name='FC1')) model.add(Activation('relu')) model.add(TimeDistributed(Dropout(0.25))) with tf.name_scope('FC2'): model.add(TimeDistributed(Dense(256), name='FC2')) model.add(Activation('relu')) model.add(TimeDistributed(Dropout(0.25))) with tf.name_scope('LSTM'): model.add(tf.keras.layers.CuDNNLSTM(64, return_sequences=False)) model.add(Dropout(0.5)) with tf.name_scope('OutputLayer'): model.add(Dense(2, activation='softmax')) with tf.name_scope('Optimizer'): optimizer = optimizers.adam(lr=1e-4, decay=1e-5) with tf.name_scope('Loss'): model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model
def _build_model(self): with tf.name_scope("inputs"): user_input = tf.keras.Input(shape=(self.history_length, 3)) label_input = tf.keras.Input(shape=(self.history_length, 1)) mask_input = tf.keras.Input(shape=(self.history_length, 1)) with tf.name_scope("layers"): embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embedding_dim, weights=[self.embedding_mx], trainable=False) session_cells = [ GRUCell(units=self.num_units, name="sesion_rnn_01"), GRUCell(units=self.num_units, name="sesion_rnn_02") # GRUCell(units=self.num_units, name="sesion_rnn_03") ] user_cells = [ GRUCell(units=self.num_units, name="user_rnn_01"), GRUCell(units=self.num_units, name="user_rnn_02") # GRUCell(units=self.num_units, name="user_rnn_03") ] cell = HierarchicalRNNCell(user_cells=user_cells, session_cells=session_cells, embedding_layer=embedding) recurrent = RNN(cell=cell, return_sequences=True, return_state=True) with tf.name_scope("loss"): loss = RankingLoss(num_units=self.num_units, num_sampled=self.num_negatives, num_classes=self.vocab_size - 1, num_true=1, history_length=self.history_length, remove_accidental_hits=True) time_distributed = TimeDistributed( loss, input_shape=(self.history_length, self.num_units + 1)) with tf.name_scope("model"): tensor = recurrent(inputs=user_input) outputs = tensor[0] outputs = tf.concat([outputs, label_input], axis=2) tensor = time_distributed(outputs) # loss loss = tf.gather(tensor, [0], axis=2) loss = tf.multiply(loss, mask_input, name="loss") # prediction prediction = tf.gather(tensor, [1], axis=2) prediction = tf.multiply(prediction, mask_input, name="prediction") # build the model model = tf.keras.Model( inputs=[user_input, label_input, mask_input], outputs=[loss, prediction]) model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss={ 'tf_op_layer_loss': custom_loss, 'tf_op_layer_prediction': 'binary_crossentropy' }, loss_weights={ 'tf_op_layer_loss': 1.0, 'tf_op_layer_prediction': 0.0 }, metrics={'tf_op_layer_prediction': custom_acc}) return model