def build_model_1(embedding_matrix, one_hot_shape): words = Input(shape=(MAX_LEN, )) x = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(words) x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='concat')(x) x = SpatialDropout1D(rate=0.3)(x) #x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='ave')(x) #x = SpatialDropout1D(rate=0.3)(x) #x = GlobalAveragePooling1D()(x) # this layer average each output from the Bidirectional layer x = concatenate([ GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x), ]) summary = Input(shape=(MAX_LEN, )) x_aux = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(summary) x_aux = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='concat')(x_aux) x_aux = SpatialDropout1D(rate=0.3)(x_aux) #x_aux = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True), merge_mode='ave')(x_aux) #x_aux = SpatialDropout1D(rate=0.3)(x_aux) # x_aux = GlobalAveragePooling1D()(x_aux) x_aux = concatenate([ GlobalMaxPooling1D()(x_aux), GlobalAveragePooling1D()(x_aux), ]) one_hot = Input(shape=(one_hot_shape, )) hidden = concatenate([x, x_aux, one_hot]) hidden = Dense(400, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(400, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(300, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(300, activation='relu')(hidden) hidden = Dropout(0.4)(hidden) hidden = Dense(100, activation='relu')(hidden) result = Dense(1, activation='linear')(hidden) model = Model(inputs=[words, summary, one_hot], outputs=[result]) # adam = keras.optimizers.Adam(lr=0.0001, clipnorm=1.0, clipvalue=0.5) model.compile(loss='mse', optimizer='adam') return model
def Create_CNN(self, inp, name_suffix): """ """ x = self.embedding(inp) if self.emb_dropout > 0: x = SpatialDropout1D(self.emb_dropout)(x) # if self.char_split: # # First conv layer # x = Conv1D(filters=128, kernel_size=3, strides=2, padding="same")(x) cnn_list = [] rnn_list = [] for filter_size in self.filter_size: if filter_size > 0: conc = self.ConvBlock(x, filter_size) cnn_list.append(conc) for rnn_unit in self.rnn_units: if rnn_unit > 0: rnn_maps = Bidirectional(GRU(rnn_unit, return_sequences=True, \ dropout=self.rnn_input_dropout, recurrent_dropout=self.rnn_state_dropout))(x) conc = self.pooling_blend(rnn_maps) rnn_list.append(conc) conc_list = cnn_list + rnn_list if len(conc_list) == 1: conc = Lambda(lambda x: x, name='RCNN_CONC' + name_suffix)(conc_list) else: conc = Concatenate(name='RCNN_CONC' + name_suffix)(conc_list) return conc
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def create_model_N_emb(df): inps = [] outs = [] for c in df: nunique = np.max(c) emb_size = int(min(50, nunique // 2)) inp = Input(shape=(1, )) out = Embedding(nunique + 1, emb_size, input_length=1)(inp) out = SpatialDropout1D(0.3)(out) inps.append(inp) outs.append(out) x = Concatenate()(outs) x = Flatten()(x) x = BatchNormalization()(x) x = Dense(2**8, activation="relu")(x) x = Dropout(0.3)(x) x = BatchNormalization()(x) x = Dense(2**8, activation="relu")(x) x = Dropout(0.3)(x) x = BatchNormalization()(x) y = Dense(1, activation='sigmoid')(x) model = Model(inputs=inps, outputs=y) return model
def generate_cnn_model(self) -> Model: sequence_input = Input(shape=(None, 3000, 1)) # convolutional layer and dropout [1] sequence = TimeDistributed( self.__generate_base_model())(sequence_input) sequence = Convolution1D(filters=128, kernel_size=self.__kernel_size, padding=self.__padding_same, activation=activations.relu)(sequence) sequence = SpatialDropout1D(rate=self.__dropout_rate)(sequence) # convolutional layer and dropout [2] sequence = Convolution1D(filters=128, kernel_size=self.__kernel_size, padding=self.__padding_same, activation=activations.relu)(sequence) sequence = Dropout(rate=self.__dropout_rate)(sequence) # last convolution and model generation model = models.Model(inputs=sequence_input, outputs=Convolution1D( filters=self.__classes_number, kernel_size=self.__kernel_size, padding=self.__padding_same, activation=activations.softmax)(sequence)) # compile model model.compile(optimizer=optimizers.Adam(), loss=losses.sparse_categorical_crossentropy, metrics=self.__metrics) return model
def Create_CNN(self): """ """ inp = Input(shape=(self.max_len, )) embedding = Embedding(self.max_token, self.embedding_dim, weights=[self.embedding_weight], trainable=not self.fix_wv_model) x = embedding(inp) if self.emb_dropout > 0: x = SpatialDropout1D(self.emb_dropout)(x) # if self.char_split: # # First conv layer # x = Conv1D(filters=128, kernel_size=3, strides=2, padding="same")(x) cnn_list = [] rnn_list = [] for filter_size in self.filter_size: if filter_size > 0: conc = self.ConvBlock(x, filter_size) cnn_list.append(conc) for rnn_unit in self.context_vector_dim: if rnn_unit > 0: rnn_maps = Bidirectional(GRU(rnn_unit, return_sequences=True, \ dropout=self.rnn_input_dropout, recurrent_dropout=self.rnn_state_dropout))(x) conc = self.pooling_blend(rnn_maps) rnn_list.append(conc) conc_list = cnn_list + rnn_list if len(conc_list) == 1: conc = Lambda(lambda x: x, name='RCNN_CONC')(conc_list) else: conc = Concatenate(name='RCNN_CONC')(conc_list) # conc = self.pooling_blend(x) if self.separate_label_layer: for i in range(self.num_classes): full_connect = self.full_connect_layer(conc) proba = Dense(1, activation="sigmoid")(full_connect) if i == 0: outp = proba else: outp = concatenate([outp, proba], axis=1) else: if self.hidden_dim[0] > 0: full_connect = self.full_connect_layer(conc) else: full_connect = conc # full_conv_0 = self.act_blend(full_conv_pre_act_0) # full_conv_pre_act_1 = Dense(self.hidden_dim[1])(full_conv_0) # full_conv_1 = self.act_blend(full_conv_pre_act_1) # flat = Flatten()(conc) outp = Dense(6, activation="sigmoid")(full_connect) model = Model(inputs=inp, outputs=outp) # print (model.summary()) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin( np.abs(lstm_units_array - sequence_len))] lstm_1 = CuDNNLSTM(lstm_units, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention( 8, 16)([activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda(lambda x: tf.reshape(tf.nn.top_k( tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))( attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='softplus',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='softplus'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def keras_LSTM_estimator(model_dir, config, learning_rate, dropout_rate, embedding_dim, embedding_path=None, word_index=None): model = models.Sequential() num_features = min(len(word_index) + 1, TOP_K) # If pre-trained embedding is used add weights to the embeddings layer # and set trainable to input is_embedding_trainable flag. if embedding_path != None: embedding_matrix = get_embedding_matrix(word_index, embedding_path, embedding_dim) is_embedding_trainable = True # set to False to freeze embedding weights model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=is_embedding_trainable)) else: model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=MAX_SEQUENCE_LENGTH)) model.add(SpatialDropout1D(dropout_rate)) model.add( LSTM(embedding_dim * 2, dropout=dropout_rate, recurrent_dropout=0.2, return_sequences=True)) model.add( LSTM(int(embedding_dim / 2), dropout=dropout_rate, recurrent_dropout=0.2)) model.add( Dense(len(CLASSES), kernel_regularizer=tf.keras.regularizers.l1(0.01), activation='softmax')) # Compile model with learning parameters. optimizer = tf.keras.optimizers.Adam(lr=learning_rate) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) estimator = tf.keras.estimator.model_to_estimator(keras_model=model, model_dir=model_dir, config=config) return estimator
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) lstm_1 = CuDNNLSTM(128, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) # no dropout to get more infomation for classifying a large number # classes else: attention_1 = Attention(8, 16)( [activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda( lambda x: tf.reshape(tf.nn.top_k(tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))(attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='softplus')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam( lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def textgenrnn_model(num_classes, cfg, context_size=None, weights_path=None, dropout=0.0, optimizer=RMSprop(lr=4e-3, rho=0.99)): ''' Builds the model architecture for textgenrnn_tf and loads the specified weights for the model. ''' input = Input(shape=(cfg['max_length'], ), name='input') embedded = Embedding(num_classes, cfg['dim_embeddings'], input_length=cfg['max_length'], name='embedding')(input) if dropout > 0.0: embedded = SpatialDropout1D(dropout, name='dropout')(embedded) rnn_layer_list = [] for i in range(cfg['rnn_layers']): prev_layer = embedded if i is 0 else rnn_layer_list[-1] rnn_layer_list.append(new_rnn(cfg, i + 1)(prev_layer)) seq_concat = concatenate([embedded] + rnn_layer_list, name='rnn_concat') attention = AttentionWeightedAverage(name='attention')(seq_concat) output = Dense(num_classes, name='output', activation='softmax')(attention) if context_size is None: model = Model(inputs=[input], outputs=[output]) if weights_path is not None: model.load_weights(weights_path, by_name=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer) else: context_input = Input(shape=(context_size, ), name='context_input') context_reshape = Reshape((context_size, ), name='context_reshape')(context_input) merged = concatenate([attention, context_reshape], name='concat') main_output = Dense(num_classes, name='context_output', activation='softmax')(merged) model = Model(inputs=[input, context_input], outputs=[main_output, output]) if weights_path is not None: model.load_weights(weights_path, by_name=True) model.compile(loss='categorical_crossentropy', optimizer=optimizer, loss_weights=[0.8, 0.2]) return model
def _train_LSTM(self, X_train, y_train, epochs=5, batch_size=64, learning_rate=0.001, reg=0.01): """ Trains LSTM - X_train: Input sequence - y_train: Target sequence - epochs - batch_size - learning_rate = Adam optimizer's learning rate - reg: Regularization Returns : - history: Scalar loss """ flatten_y = [category for sublist in y_train for category in sublist] class_weights = class_weight.compute_class_weight( 'balanced', np.unique(flatten_y), flatten_y) optim = tf.keras.optimizers.Adam(learning_rate=learning_rate) model = models.Sequential() model.add( Embedding(input_dim=self.max_word_count, output_dim=self.embedding_dim)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(32, activation='relu')) model.add(Dense(8, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optim, metrics=[BinaryAccuracy()]) history = model.fit(X_train, y_train, class_weight=class_weight, epochs=epochs, batch_size=batch_size, validation_split=0.25, verbose=self.verbose, callbacks=[ EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001) ]) self.model = model self.history = history.history
def make_model(batch_size=None): source = Input(shape=(maxlen,), batch_size=batch_size, dtype=tf.int32, name='Input') embedding = Embedding(input_dim=max_features, output_dim=embedding_size, input_length = X.shape[1],name='Embedding')(source) drop = SpatialDropout1D(0.5)(embedding) #rnn = Bidirectional(LSTM(lstm_out, name = 'LSTM',dropout=0.50, recurrent_dropout=0.50))(drop) rnn = LSTM(lstm_out, name = 'LSTM',dropout=0.40, recurrent_dropout=0.40)(drop) predicted_var = Dense(2, activation='sigmoid', name='Output')(rnn) model = tf.keras.Model(inputs=[source], outputs=[predicted_var]) model.compile( #optimizer='rmsprop', optimizer=tf.keras.optimizers.RMSprop(decay=1e-3), loss = 'categorical_crossentropy', metrics=['acc']) return model
def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, dropout_rate=0): # type: (Layer, int, int, int, str, float) -> Tuple[Layer, Layer] """Defines the residual block for the WaveNet TCN Args: x: The previous layer in the model dilation_rate: The dilation power of 2 we are using for this residual block nb_filters: The number of convolutional filters to use in this block kernel_size: The size of the convolutional kernel padding: The padding used in the convolutional layers, 'same' or 'causal'. dropout_rate: Float between 0 and 1. Fraction of the input units to drop. Returns: A tuple where the first element is the residual model layer, and the second is the skip connection. """ prev_x = x for k in range(2): zero_padding = (kernel_size - 1) * dilation_rate x = tf.keras.layers.Lambda(lambda inputs: tf.pad( inputs, tf.constant([( 0, 0, ), (1, 0), (0, 0)]) * zero_padding))(x) x = Conv1D(filters=nb_filters, kernel_size=kernel_size, dilation_rate=dilation_rate, padding='valid')(x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=dropout_rate)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(nb_filters, 1, padding='valid')(prev_x) res_x = keras.layers.add([prev_x, x]) return res_x, x
def build_model_7(self): input_ = Input(shape=(8, )) emb = Embedding(len(self.words) + 1, 300, embeddings_initializer='uniform')(input_) emb = SpatialDropout1D(0.01)(emb) lstm_1 = LSTM(128, return_sequences=True)(emb) lstm_1 = Dropout(0.01)(lstm_1) lstm_2 = LSTM(128)(lstm_1) lstm_2 = Dropout(0.01)(lstm_2) out = Dense(len(self.words), activation='softmax')(lstm_2) model = Model(input_, out) opt = Adam(lr=0.002) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def sep_cnn_model(input_shape, num_classes, num_features, blocks=1, filters=64, kernel_size=4, dropout_rate=0.25): op_units, op_activation = _get_last_layer_units_and_activation(num_classes) model = models.Sequential() model.add( Embedding(input_dim=num_features, output_dim=100, input_length=input_shape)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(50, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(op_units, activation=op_activation)) return model
def __generate_base_model(self) -> Model: sequence_input = Input(shape=(3000, 1)) # twice convolutional layer sequence = Convolution1D(filters=32, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence_input) sequence = Convolution1D(filters=32, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence) for filters in [32, 32, 256]: # max pool and dropout sequence = MaxPool1D(pool_size=self.__pool_size, padding=self.__padding_valid)(sequence) sequence = SpatialDropout1D(rate=self.__dropout_rate)(sequence) # twice convolutional layer again sequence = Convolution1D(filters=filters, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence) sequence = Convolution1D(filters=filters, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence) # finale block sequence = GlobalMaxPool1D()(sequence) sequence = Dropout(rate=self.__dropout_rate)(sequence) sequence = Dense(units=64, activation=activations.relu)(sequence) # last dropout and model generation model = models.Model( inputs=sequence_input, outputs=Dropout(rate=self.__dropout_rate)(sequence)) # compile model model.compile(optimizer=optimizers.Adam(), loss=losses.sparse_categorical_crossentropy, metrics=self.__metrics) return model
def residual_block(x, dilation_rate, nb_filters, kernel_size, padding, activation='relu', dropout_rate=0, kernel_initializer='he_normal', use_batch_norm=False): # type: (Layer, int, int, int, str, str, float, str, bool) -> Tuple[Layer, Layer] """Defines the residual block for the WaveNet TCN Args: x: The previous layer in the model dilation_rate: The dilation power of 2 we are using for this residual block nb_filters: The number of convolutional filters to use in this block kernel_size: The size of the convolutional kernel padding: The padding used in the convolutional layers, 'same' or 'causal'. activation: The final activation used in o = Activation(x + F(x)) dropout_rate: Float between 0 and 1. Fraction of the input units to drop. kernel_initializer: Initializer for the kernel weights matrix (Conv1D). use_batch_norm: Whether to use batch normalization in the residual layers or not. Returns: A tuple where the first element is the residual model layer, and the second is the skip connection. """ prev_x = x for k in range(2): x = Conv1D(filters=nb_filters, kernel_size=kernel_size, dilation_rate=dilation_rate, kernel_initializer=kernel_initializer, padding=padding)(x) if use_batch_norm: x = BatchNormalization( )(x) # TODO should be WeightNorm here, but using batchNorm instead x = Activation('relu')(x) x = SpatialDropout1D(rate=dropout_rate)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(nb_filters, 1, padding='same')(prev_x) res_x = tf.keras.layers.add([prev_x, x]) res_x = Activation(activation)(res_x) return res_x, x
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin(np.abs(lstm_units_array-sequence_len))] lstm_1 = Bidirectional(CuDNNLSTM(lstm_units, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) if lstm_units <=128: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='relu',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='relu'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def get_rnn_model(MAX_NB_WORDS, embedding_matrix_2): """ Creating the RNN model :param MAX_NB_WORDS: maximum length of the seuquence :param embedding_matrix_2: embedding matrix """ # defining input shape of the data inp = Input(shape=(50, )) # defining input shape of the metadata # the data predictions from the first network meta_input = Input(shape=(1,)) # layers: # ------------------------------------------------ x = Embedding(MAX_NB_WORDS, 300, input_length=50, weights=[ embedding_matrix_2], trainable=False)(inp) x = SpatialDropout1D(0.2)(x) x = Bidirectional(GRU(100, return_sequences=True))(x) x = Bidirectional(GRU(100, return_sequences=True))(x) x = Conv1D(512, kernel_size=1, padding="valid", kernel_initializer="he_uniform")(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) conc = concatenate([avg_pool, max_pool]) conc = BatchNormalization()(conc) # on this layer, the input and the metadata # from the first model are concatenated conc = concatenate([conc, meta_input]) conc = Dense(512)(conc) conc = BatchNormalization()(conc) conc = LeakyReLU()(conc) outp = Dense(90, activation='softmax')(conc) # input here is an array (main input and meta-input) model = Model(inputs=[inp, meta_input], outputs=outp) # Compiling the model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def getModel(self, nwords, nchars, ntags, max_len, max_len_char, embedding_matrix): word_in = Input(shape=(max_len, )) emb_word = Embedding(nwords + 1, len(embedding_matrix[0]), weights=[embedding_matrix], input_length=max_len, trainable=False)(word_in) # input and embeddings for characters char_in = Input(shape=( max_len, max_len_char, )) emb_char = TimeDistributed( Embedding(input_dim=nchars + 2, output_dim=10, input_length=max_len_char, mask_zero=True))(char_in) # character LSTM to get word encodings by characters char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.68))(emb_char) x = concatenate([emb_word, char_enc]) x = SpatialDropout1D(0.3)(x) main_lstm = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.68))(x) out = TimeDistributed(Dense(ntags + 1, activation="softmax"))(main_lstm) model = Model([word_in, char_in], out) model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["acc"]) model.summary() return model
def residual_block(x, s, i, activation, nb_filters, kernel_size): original_x = x conv = Conv1D(filters=nb_filters, kernel_size=kernel_size, dilation_rate=2**i, padding='causal', name='dilated_conv_%d_tanh_s%d' % (2**i, s))(x) if activation == 'norm_relu': x = Activation('relu')(conv) x = Lambda(channel_normalization)(x) elif activation == 'wavenet': x = wave_net_activation(conv) else: x = Activation(activation)(conv) x = SpatialDropout1D(0.05)(x) # 1x1 conv. x = Convolution1D(nb_filters, 1, padding='same')(x) res_x = keras.layers.add([original_x, x]) return res_x, x
def train(self, epochs=100, batch_size=None, iterations=100, dropout_percent=0.2): total_feature_count = self._big_data_file.get_feature_count() unique_labels_count = self._big_data_file.get_unique_labels_count() if batch_size is None: batch_size = total_feature_count iterations = 1 self._model = Sequential() # Add the layers self._model.add( Embedding(self._big_data_file.get_feature_dict_size(), 50)) self._model.add(SpatialDropout1D(rate=dropout_percent)) self._model.add(BatchNormalization()) self._model.add(Dropout(dropout_percent)) self._model.add(LSTM(units=30)) self._model.add(BatchNormalization()) self._model.add(Dropout(dropout_percent, name="location")) self._model.add(Dense(unique_labels_count, activation='softmax')) self._model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self._log("Training started...") self._training_start_time = time.time() self._model.fit_generator( generator=self._feature_batch_generator(batch_size=batch_size), steps_per_epoch=iterations, epochs=epochs, verbose=True) self._training_end_time = time.time() self._log("Training complete. Elapsed Time: " + str(self.get_training_time()))
def add(self, rate, **kwargs): return self._add_layer(SpatialDropout1D(rate, **kwargs))
def deepSimDEF_network(args, model_ind, max_ann_len=None, go_term_embedding_file_path=None, sub_ontology_interested=None, go_term_indeces=None, model_summary=False): embedding_dim = args.embedding_dim activation_hidden = args.activation_hidden activation_highway = args.activation_highway activation_output = args.activation_output dropout = args.dropout embedding_dropout = args.embedding_dropout annotation_dropout = args.annotation_dropout pretrained_embedding = args.pretrained_embedding updatable_embedding = args.updatable_embedding loss = args.loss optimizer = args.optimizer learning_rate = args.learning_rate checkpoint = args.checkpoint verbose = args.verbose highway_layer = args.highway_layer cosine_similarity = args.cosine_similarity deepsimdef_mode = args.deepsimdef_mode _inputs = [ ] # used to represent the input data to the network (from different channels) _embeddings = {} # used for weight-sharing of the embeddings _denses = [] # used for weight-sharing of dense layers whenever needed _Gene_channel = [] # for the middle part up-until highway if checkpoint: with open('{}/model_{}.json'.format(checkpoint, model_ind + 1), 'r') as json_file: model = model_from_json(json_file.read()) # load the json model model.load_weights('{}/model_{}.h5'.format( checkpoint, model_ind + 1)) # load weights into new model if deepsimdef_mode == 'training': model.compile(loss=loss, optimizer=optimizer) if verbose: print("Loaded model {} from disk".format(model_ind + 1)) return model for i in range(2): # bottom-half of the network, 2 for 2 channels _GO_term_channel = [ ] # for bottom-half until flattening maxpooled embeddings for sbo in sub_ontology_interested: _inputs.append(Input(shape=(max_ann_len[sbo], ), dtype='int32')) if sbo in _embeddings: embedding_layer = _embeddings[ sbo] # for the second pair when we need weight-sharing else: if pretrained_embedding: embedding_matrix = load_embedding( go_term_embedding_file_path[sbo], embedding_dim, go_term_indeces[sbo]) if verbose: print( "Loaded {} word vectors for {} (Model {})".format( len(embedding_matrix), sbo, model_ind + 1)) embedding_layer = Embedding( input_dim=len(go_term_indeces[sbo]) + 1, output_dim=embedding_dim, weights=[embedding_matrix], input_length=max_ann_len[sbo], trainable=updatable_embedding, name="embedding_{}_{}".format(sbo, model_ind)) else: # without using pre-trained word embedings embedding_layer = Embedding( input_dim=len(go_term_indeces[sbo]) + 1, output_dim=embedding_dim, input_length=max_ann_len[sbo], name="embedding_{}_{}".format(sbo, model_ind)) _embeddings[sbo] = embedding_layer GO_term_emb = embedding_layer(_inputs[-1]) if 0 < annotation_dropout: GO_term_emb = DropAnnotation(annotation_dropout)(GO_term_emb) if 0 < embedding_dropout: GO_term_emb = SpatialDropout1D(embedding_dropout)(GO_term_emb) GO_term_emb = MaxPooling1D(pool_size=max_ann_len[sbo])(GO_term_emb) GO_term_emb = Flatten()(GO_term_emb) _GO_term_channel.append(GO_term_emb) Gene_emb = Concatenate(axis=-1)(_GO_term_channel) if 1 < len( sub_ontology_interested) else _GO_term_channel[0] Dns = _denses[0] if len(_denses) == 1 else Dense( units=embedding_dim * len(sub_ontology_interested), activation=activation_hidden) _denses.append(Dns) Gene_emb = Dns(Gene_emb) Gene_emb = Dropout(dropout)(Gene_emb) _Gene_channel.append(Gene_emb) if cosine_similarity: preds = Dot(axes=1, normalize=True)(_Gene_channel) else: merge = Concatenate(axis=-1)(_Gene_channel) if highway_layer: merge = highway(merge, activation=activation_highway) merge = Dropout(dropout)(merge) merge = Dense(units=embedding_dim * len(sub_ontology_interested), activation=activation_hidden)(merge) merge = Dropout(dropout)(merge) preds = Dense(units=1, activation=activation_output)(merge) model = Model(inputs=_inputs, outputs=preds) model.compile(loss=loss, optimizer=optimizer) model.optimizer.lr = learning_rate # setting the learning rate of the model optimizer if model_summary: print(model.summary()) if verbose: print("Model for fold number {} instantiated!!\n".format(model_ind + 1)) return model
def get_rnn_model(MAX_NB_WORDS, embedding_matrix_2): """ Creating the RNN model :param MAX_NB_WORDS: maximum length of the seuquence :param embedding_matrix_2: embedding matrix for the tokens """ # defining input shape of the data inp = Input(shape=(50, )) # the layers # ------------------------------------------- # defining Embedding layer x = Embedding(input_dim=MAX_NB_WORDS, ouput_dim=300, input_length=50, weights=[embedding_matrix_2], trainable=False)(inp) # ---------------------------------------------------- # dropout layer x = SpatialDropout1D(0.2)(x) # ------------------------------------------------------------------------- # defining RRN part # two successive bidirectional GRU layers followed by a 1D Conv layer improved the performance """ after some trial and error this combination was found to be the best. dono really why :( GRU chosen over LSTM since it is simpler and faster """ x = Bidirectional(GRU(100, return_sequences=True))(x) x = Bidirectional(GRU(100, return_sequences=True))(x) x = Conv1D(512, kernel_size=1, padding="valid", kernel_initializer="he_uniform")(x) # -------------------------------------------------------------------------- # defining two pooling layers average and maximum. so we reduce the dimensionality avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) # concatenating the two pooling layers conc = concatenate([avg_pool, max_pool]) # ------------------------------------------------------------------------- # applying batch normalization to speed the weights learning """ standardizes the input values. mean = 0, std = 1 """ conc = BatchNormalization()(conc) # ------------------------------------------------------------------------- conc = LeakyReLU()(conc) # ------------------------------------------------------------------------- # defining the ouput layer outp = Dense(10, activation='softmax')(conc) # -------------------------------------------------------------------- # defining the model model = Model(inputs=inp, outputs=outp) # if we want to load pre-trained weights # .load_weights("/weights-improvement-01-0.69.hdf5") # Compiling the model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def evaluate_model(trainX, trainy, testX, testy, testy_norm): """ Create, fit and evaluate a model :param trainX: (array) :param trainy: (array) :param testX: (array) :param testy: (array) :param testy_norm: (array) :return: accurancy (float) loss (float) """ verbose, epochs, batch_size = 1, 60, 16 # 16 trainX, testX = scale_data(trainX, testX) # trainX, testX = Magnitude(trainX,testX) # trainX, testX = AutoCorallation(trainX, testX) n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[ 2], trainy.shape[1] print(testX.shape) print(testy.shape) model = Sequential() # Small structure model.add( Conv1D(32, 5, activation='relu', padding='same', input_shape=(n_timesteps, n_features))) model.add(MaxPooling1D(pool_size=2)) model.add(Conv1D(64, 5, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=2)) model.add(Conv1D(128, 5, activation='relu', padding='same')) model.add(SpatialDropout1D(0.5)) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(2, activation='relu')) model.add(Dense(n_outputs, activation='softmax')) model.summary() plot_model(model, 'model_info.png', show_shapes=True) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # fit network tensorboard = TensorBoard(log_dir="logs_3xconv/{}".format(time()), histogram_freq=1, write_images=True) history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose, validation_split=0.15, shuffle=True, callbacks=[tensorboard]) # evaluate model loss, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0) export_model(model) predictions = model.predict_classes(testX) print(metrics.classification_report(testy_norm, predictions)) confusion_matrix = metrics.confusion_matrix(y_true=testy_norm, y_pred=predictions) print(confusion_matrix) normalised_confusion_matrix = np.array( confusion_matrix, dtype=np.float32) / np.sum(confusion_matrix) * 100 print("") print("Confusion matrix (normalised to % of total test data):") print(normalised_confusion_matrix) width = 12 height = 12 # fig, ax = plt.subplots() plt.figure(figsize=(width, height)) plt.imshow(normalised_confusion_matrix, interpolation='nearest', cmap=plt.cm.rainbow) plt.title("Confusion matrix \n(normalized to the entire test set [%])") plt.colorbar() tick_marks = np.arange(2) LABELS = ["Dynamic", "Static"] plt.xticks(tick_marks, LABELS, rotation=90) plt.yticks(tick_marks, LABELS) plt.tight_layout() plt.ylabel('Real value') plt.xlabel('Prediction value') plt.figure() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Training', 'Validation'], loc='upper left') plt.figure() plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model accurancy') plt.ylabel('Accurancy') plt.xlabel('Epoch') plt.legend(['Training', 'Validation'], loc='upper left') plt.show() return accuracy, loss
def get_rnn_model(MAX_NB_WORDS, embedding_matrix_2): """ Creating the RNN model :param MAX_NB_WORDS: maximum length of the seuquence :param embedding_matrix_2: embedding matrix """ # ----------------------------------------------------- """ both previous models are used here. predictions from both models are used as metadata in the network """ # defining input shape of the data inp = Input(shape=(50, )) # defining input shape of the level_1 predictions meta_input_1 = Input(shape=(1,)) # defining input shape of the level_2 predictions meta_input_2 = Input(shape=(1,)) # ----------------------------------------------------- # defining Embedding layer x = Embedding(MAX_NB_WORDS, 300, input_length=50, weights=[ embedding_matrix_2], trainable=False)(inp) # ----------------------------------------------- # defining spatial dropout x = SpatialDropout1D(0.2)(x) # ---------------------------------------------------------- # defining RRN part x = Bidirectional(GRU(100, return_sequences=True))(x) x = Bidirectional(GRU(100, return_sequences=True))(x) # defining the convolutional layer x = Conv1D(512, kernel_size=1, padding="valid", kernel_initializer="he_uniform")(x) # -------------------------------------------------------- # defing two pooling layers average and maximum avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) # concating the two pooling layers conc = concatenate([avg_pool, max_pool]) # ----------------------------------------------------- # applying batch normalization to speed the weights learning conc = BatchNormalization()(conc) # ---------------------------------------------------- """ both predictions are concatenated with the new input data and fed into a dense layer """ # concating the numerical and embedding feature conc = concatenate([conc, meta_input_1, meta_input_2]) # applying dense layer on the concatenation conc = Dense(512)(conc) # --------------------------------------------------- # increases the learning speed conc = BatchNormalization()(conc) # --------------------------------- # applying leakyRelu conc = LeakyReLU()(conc) # -------------------------------------------------- # defining the ouput layer outp = Dense(477, activation='softmax')(conc) # -------------------------------------------------- # 3 inputs model = Model(inputs=[inp, meta_input_1, meta_input_2], outputs=outp) # if you to load pre-trained weights # .load_weights("weights-improvement-01-0.69.hdf5") # Compiling the model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def init_model(self, input_shape, num_classes, **kwargs): freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) inputs = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) x = Reshape((input_shape[0], input_shape[1], 1))(inputs) # Conv block 1 x = Convolution2D(64, 3, 1, padding='same', name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) # Conv block 2 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) x = Dropout(0.1, name='dropout2')(x) # Conv block 3 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x) x = Dropout(0.1, name='dropout3')(x) if min_size // 24 >= 4: # Conv block 4 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv4')(x) x = BatchNormalization(axis=channel_axis, name='bn4')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) x = Dropout(0.1, name='dropout4')(x) x = Reshape((-1, channel_size))(x) avg = GlobalAvgPool1D()(x) max = GlobalMaxPool1D()(x) x = concatenate([avg, max], axis=-1) # x = Dense(max(int(num_classes*1.5), 128), activation='relu', name='dense1')(x) x = Dropout(0.3)(x) outputs1 = Dense(num_classes, activation='softmax', name='output')(x) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs2 = Dense(units=num_classes, activation='softmax')(dense_1) outputs = Average()([outputs1, outputs2]) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True