def __call__(self, inputs): x = inputs[0] kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(256, 3, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(2)(x) x = kl.Flatten()(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Dense(self.nb_hidden, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def __call__(self, inputs): x = inputs[0] kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(256, 7, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.recurrent.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def _res_unit(self, inputs, nb_filter, size=3, stride=1, stage=1, block=1): name = '%02d-%02d/' % (stage, block) id_name = '%sid_' % (name) res_name = '%sres_' % (name) # Residual branch # 1x1 down-sample conv x = kl.BatchNormalization(name=res_name + 'bn1')(inputs) x = kl.Activation('relu', name=res_name + 'act1')(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(nb_filter[0], 1, name=res_name + 'conv1', subsample_length=stride, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) # LxL conv x = kl.BatchNormalization(name=res_name + 'bn2')(x) x = kl.Activation('relu', name=res_name + 'act2')(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(nb_filter[1], size, name=res_name + 'conv2', border_mode='same', kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) # 1x1 up-sample conv x = kl.BatchNormalization(name=res_name + 'bn3')(x) x = kl.Activation('relu', name=res_name + 'act3')(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(nb_filter[2], 1, name=res_name + 'conv3', kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) # Identity branch if nb_filter[-1] != inputs._keras_shape[-1] or stride > 1: kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) identity = kl.Conv1D(nb_filter[2], 1, name=id_name + 'conv1', subsample_length=stride, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(inputs) else: identity = inputs x = kl.merge([identity, x], name=name + 'merge', mode='sum') return x
def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4): '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout # Arguments x: input tensor stage: index for dense block branch: layer index within each dense block nb_filter: number of filters dropout_rate: dropout rate weight_decay: weight decay factor ''' eps = 1.1e-5 conv_name_base = 'conv' + str(stage) + '_' + str(branch) relu_name_base = 'relu' + str(stage) + '_' + str(branch) # 1x1 Convolution (Bottleneck layer) inter_channel = nb_filter * 4 x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base + '_x1_bn')(x) x = Scale(axis=concat_axis, name=conv_name_base + '_x1_scale')(x) x = Activation('relu', name=relu_name_base + '_x1')(x) x = Convolution2D(inter_channel, 1, 1, name=conv_name_base + '_x1', bias=False, kernel_regularizer=regularizers.L1L2(l2=1E-4))(x) if dropout_rate: x = Dropout(dropout_rate)(x) # 3x3 Convolution x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base + '_x2_bn')(x) x = Scale(axis=concat_axis, name=conv_name_base + '_x2_scale')(x) x = Activation('relu', name=relu_name_base + '_x2')(x) x = ZeroPadding2D((1, 1), name=conv_name_base + '_x2_zeropadding')(x) x = Convolution2D(nb_filter, 3, 3, name=conv_name_base + '_x2', bias=False, kernel_regularizer=regularizers.L1L2(l2=1E-4))(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x
def __init__(self, l1=0.0, l2=0.0, **kwargs): super().__init__( activity_regularizer=regularizers.L1L2(l1=l1, l2=l2), **kwargs ) self.supports_masking = True self.l1 = l1 self.l2 = l2
def test_dense(): layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(3, 2)) layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 2)) layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(None, None, 2)) layer_test(layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2)) layer_test(layers.Dense, kwargs={'units': 3, 'kernel_regularizer': regularizers.l2(0.01), 'bias_regularizer': regularizers.l1(0.01), 'activity_regularizer': regularizers.L1L2(l1=0.01, l2=0.01), 'kernel_constraint': constraints.MaxNorm(1), 'bias_constraint': constraints.max_norm(1)}, input_shape=(3, 2)) layer = layers.Dense(3, kernel_regularizer=regularizers.l1(0.01), bias_regularizer='l1') layer.build((None, 4)) assert len(layer.losses) == 2
def __call__(self, inputs): x = inputs[0] kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, name='conv1', kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.Activation('relu', name='act1')(x) x = kl.MaxPooling1D(2, name='pool1')(x) # 124 x = self._res_unit(x, [32, 32, 128], stage=1, block=1, stride=2) x = self._res_unit(x, [32, 32, 128], atrous=2, stage=1, block=2) x = self._res_unit(x, [32, 32, 128], atrous=4, stage=1, block=3) # 64 x = self._res_unit(x, [64, 64, 256], stage=2, block=1, stride=2) x = self._res_unit(x, [64, 64, 256], atrous=2, stage=2, block=2) x = self._res_unit(x, [64, 64, 256], atrous=4, stage=2, block=3) # 32 x = self._res_unit(x, [128, 128, 512], stage=3, block=1, stride=2) x = self._res_unit(x, [128, 128, 512], atrous=2, stage=3, block=2) x = self._res_unit(x, [128, 128, 512], atrous=4, stage=3, block=3) # 16 x = self._res_unit(x, [256, 256, 1024], stage=4, block=1, stride=2) x = kl.GlobalAveragePooling1D()(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def __call__(self, inputs): x = inputs[0] kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, name='conv1', kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(x) x = kl.BatchNormalization(name='bn1')(x) x = kl.Activation('relu', name='act1')(x) x = kl.MaxPooling1D(2, name='pool1')(x) # 124 x = self._res_unit(x, 128, stage=1, block=1, stride=2) x = self._res_unit(x, 128, stage=1, block=2) # 64 x = self._res_unit(x, 256, stage=2, block=1, stride=2) # 32 x = self._res_unit(x, 256, stage=3, block=1, stride=2) # 32 x = self._res_unit(x, 512, stage=4, block=1, stride=2) x = kl.GlobalAveragePooling1D()(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def _replicate_model(self, input): kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Dense(256, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)(input) x = kl.Activation(self.act_replicate)(x) return km.Model(input, x)
def train(): add_train() batch_size = 32 print('Loading data...') (x_train, y_train), (x_test, y_test) = import_data() model = Sequential() x_train = sequence.pad_sequences(x_train) x_test = sequence.pad_sequences(x_test) model.add(GaussianDropout(.1)) model.add( Embedding(8, 128, embeddings_initializer='TruncatedNormal', activity_regularizer=kr.L1L2(0.01, 0.01))) model.add( LSTM(128, dropout=0.1, recurrent_dropout=0.1, return_sequences=True, go_backwards=True)) model.add(MaxPool1D()) model.add(Dense(64, input_shape=(128, ), activation='sigmoid')) model.add(Bidirectional(LSTM(32, dropout=0.2, recurrent_dropout=0.1))) model.add(Dense( 1, activation='sigmoid', )) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('Training model') model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test), workers=100, use_multiprocessing=True, verbose=False, shuffle=True) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) print(model.summary()) print('Test score:', score) print('Test accuracy:', acc) model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) if float(acc) < 0.9: print("Accuracy is less than 0.9") train()
def build(self, input_shape): # W、K and V self.kernel = self.add_weight(name='WKV', shape=(3, input_shape[2], self.output_dim), trainable=True, initializer='uniform', regularizer=regularizers.L1L2(0.0000032)) super().build(input_shape)
def denseNet(input_dim): base_model = densenet.DenseNet(input_shape=(input_dim, input_dim, 3), classes=17, dropout_rate=0.2, weights=None, include_top=False) x = Dense(17, activation='softmax', kernel_regularizer=regularizers.L1L2(l2=1E-4), bias_regularizer=regularizers.L1L2(l2=1E-4))(base_model.output) model = Model(inputs=base_model.input, outputs=x) # Load model weights_file = "../weights/DenseNet-40-12CIFAR10-tf.h5" if os.path.exists(weights_file): model.load_weights(weights_file) print("Model loaded.") return model
def _create_base_network(self): a = 'tanh' ar = regularizers.L1L2() model = Sequential() model.add( Dense(self.INPUT_DIM, input_shape=(self.INPUT_DIM, ), activation=a)) model.add(Dense(600, activation=a)) model.add(Dense(self.INPUT_DIM, activation=a)) return model
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, 'shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.GRU(128, kernel_regularizer=kernel_regularizer, return_sequences=True), merge_mode='concat')(x) kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) gru = kl.GRU(256, kernel_regularizer=kernel_regularizer) x = kl.Bidirectional(gru)(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def create_model_sequential_bn2(nvariables, lr=0.001, clipnorm=10., nodes1=64, nodes2=32, nodes3=16, l1_reg=0.0, l2_reg=0.0, use_bn=True, use_dropout=False): # Adding 1 BN layer right after the input layer regularizer = regularizers.L1L2(l1=l1_reg, l2=l2_reg) model = Sequential() if use_bn: model.add( BatchNormalization(input_shape=(nvariables, ), epsilon=1e-4, momentum=0.9)) model.add( Dense(nodes1, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)) if use_bn: model.add(BatchNormalization(epsilon=1e-4, momentum=0.9)) model.add(Activation('tanh')) if nodes2: model.add( Dense(nodes2, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)) if use_bn: model.add(BatchNormalization(epsilon=1e-4, momentum=0.9)) model.add(Activation('tanh')) if nodes3: model.add( Dense(nodes3, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)) if use_bn: model.add(BatchNormalization(epsilon=1e-4, momentum=0.9)) model.add(Activation('tanh')) # Output node model.add( Dense(1, activation='linear', kernel_initializer='glorot_uniform')) # Set loss and optimizers adam = optimizers.Adam(lr=lr, clipnorm=clipnorm) model.compile(optimizer=adam, loss=huber_loss, metrics=['acc']) model.summary() return model
def __call__(self, models): layers = [] for layer in range(self.nb_layer): kernel_regularizer = kr.L1L2(l1=self.l1_decay, l2=self.l2_decay) layers.append( kl.Dense(self.nb_hidden, kernel_initializer=self.init, kernel_regularizer=kernel_regularizer)) layers.append(kl.Activation('relu')) layers.append(kl.Dropout(self.dropout)) return self._build(models, layers)
def _create_base_network(self, input_dim): a = 'tanh' ar = regularizers.L1L2() network = Sequential() network.add( Dense(300, input_shape=(input_dim, ), activation=a, activity_regularizer=ar)) network.add(Dense(150, activation=a)) network.add(Dense(250, activation=a)) network.add(Dense(300, activation=a)) return network
def _generate_model(self): ''' Generate model with empty class ''' input = self.X_train.shape[1] model = Sequential() reg = regularizers.L1L2(l2=0.01) model.add(Dense(input // 2, activation='relu',input_dim=input, kernel_regularizer=reg)) model.add(Dropout(0.5)) model.add(Dense(input // 4, activation='relu', kernel_regularizer=reg)) model.add(BatchNormalization()) model.add(Dense(2, activation='softmax', kernel_regularizer=reg)) sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) return model
def RULmodel_SN_5(input_shape): #Create a sequential model model = Sequential() #Add the layers for the model model.add( Dense(20, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', kernel_regularizer=regularizers.L1L2(l1_lambda_regularization, l2_lambda_regularization), name='fc1')) model.add( Dense(20, input_dim=input_shape, activation='relu', kernel_initializer='glorot_normal', kernel_regularizer=regularizers.L1L2(l1_lambda_regularization, l2_lambda_regularization), name='fc2')) model.add(Dense(1, activation='linear', name='out')) return model
def create_model_sequential(nvariables, lr=0.001, clipnorm=10., nodes1=64, nodes2=32, nodes3=16, l1_reg=0.0, l2_reg=0.0): regularizer = regularizers.L1L2(l1=l1_reg, l2=l2_reg) model = Sequential() model.add( Dense(nodes1, input_shape=(nvariables, ), activation='tanh', kernel_initializer='glorot_uniform', kernel_regularizer=regularizer)) #model.add(Dropout(0.2)) if nodes2: model.add( Dense(nodes2, activation='tanh', kernel_initializer='glorot_uniform', kernel_regularizer=regularizer)) #model.add(Dropout(0.2)) if nodes3: model.add( Dense(nodes3, activation='tanh', kernel_initializer='glorot_uniform', kernel_regularizer=regularizer)) #model.add(Dropout(0.2)) # Output node model.add( Dense(1, activation='linear', kernel_initializer='glorot_uniform')) # Set loss and optimizers adam = optimizers.Adam(lr=lr, clipnorm=clipnorm) model.compile(optimizer=adam, loss=huber_loss, metrics=['acc']) model.summary() return model
def pos_affine_relu(pos_length, ext_n_bases, ext_filters, feat_name, pos_effect_kwargs): """Get the affine+relu transformation module Returns the input and output node """ pos_in = kl.Input((pos_length, 1), name=feat_name) pos_features = kl.Conv1D(filters=pos_effect_kwargs["n_bases"], kernel_size=1, use_bias=True, activation="relu", name=feat_name + "_conv_features")(pos_in) pos_out = kl.Conv1D(filters=ext_filters, kernel_size=1, kernel_regularizer=kr.L1L2(l2=pos_effect_kwargs["l2"]), use_bias=pos_effect_kwargs["use_bias"], activation=pos_effect_kwargs.get("activation", None), name=feat_name + "_conv_combine")(pos_features) if pos_effect_kwargs["merge"]["type"] == "multiply": pos_out = kl.Lambda(lambda x: 1.0 + x)(pos_out) return pos_in, pos_out
def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout # Arguments x: input tensor stage: index for dense block nb_filter: number of filters compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. dropout_rate: dropout rate weight_decay: weight decay factor ''' eps = 1.1e-5 conv_name_base = 'conv' + str(stage) + '_blk' relu_name_base = 'relu' + str(stage) + '_blk' pool_name_base = 'pool' + str(stage) x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base + '_bn')(x) x = Scale(axis=concat_axis, name=conv_name_base + '_scale')(x) x = Activation('relu', name=relu_name_base)(x) x = Convolution2D(int(nb_filter * compression), 1, 1, name=conv_name_base, bias=False, kernel_regularizer=regularizers.L1L2(l2=1E-4))(x) if dropout_rate: x = Dropout(dropout_rate)(x) x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x) return x
def densenet121_model(img_rows, img_cols, weights_path, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None): ''' DenseNet 121 Model for Keras Model Schema is based on https://github.com/flyyufelix/DenseNet-Keras ImageNet Pretrained Weights Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfMlRYb3YzV210VzQ TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfSTA4SHJVOHNuTXc # Arguments nb_dense_block: number of dense blocks to add to end growth_rate: number of filters to add per dense block nb_filter: initial number of filters reduction: reduction factor of transition blocks. dropout_rate: dropout rate weight_decay: weight decay factor classes: optional number of classes to classify images weights_path: path to pre-trained weights # Returns A Keras model instance. ''' eps = 1.1e-5 # compute compression factor compression = 1.0 - reduction # Handle Dimension Ordering for different backends global concat_axis if K.image_dim_ordering() == 'tf': concat_axis = 3 img_input = Input(shape=(img_rows, img_cols, color_type), name='data') else: concat_axis = 1 img_input = Input(shape=(color_type, img_rows, img_cols), name='data') # From architecture for ImageNet (Table 1 in the paper) nb_filter = 64 nb_layers = [6, 12, 24, 16] # For DenseNet-121 # Initial convolution x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), name='conv1', bias=False, kernel_regularizer=regularizers.L1L2(l2=1E-4))(x) x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x) x = Scale(axis=concat_axis, name='conv1_scale')(x) x = Activation('relu', name='relu1')(x) x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x) x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) # Add dense blocks for block_idx in range(nb_dense_block - 1): stage = block_idx + 2 x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) # Add transition_block x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate, weight_decay=weight_decay) nb_filter = int(nb_filter * compression) final_stage = stage + 1 x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv' + str(final_stage) + '_blk_bn')(x) x = Scale(axis=concat_axis, name='conv' + str(final_stage) + '_blk_scale')(x) x = Activation('relu', name='relu' + str(final_stage) + '_blk')(x) x_fc = GlobalAveragePooling2D(name='pool' + str(final_stage))(x) x_fc = Dense(1000, name='fc6')(x_fc) x_fc = Activation('softmax', name='prob')(x_fc) model = Model(img_input, x_fc, name='densenet') model.load_weights(weights_path, by_name=True) # Truncate and replace softmax layer for transfer learning # Cannot use model.layers.pop() since model is not of Sequential() type # The method below works since pre-trained weights are stored in layers but not in the model x_newfc = GlobalAveragePooling2D(name='pool' + str(final_stage))(x) x_newfc = Dense(num_classes, name='fc6', kernel_regularizer=regularizers.L1L2(l2=1E-4))(x_newfc) x_newfc = Activation('softmax', name='prob')(x_newfc) model = Model(img_input, x_newfc) # Learning rate is changed to 0.001 sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) return model
def test_convolutional_recurrent_statefulness(): data_format = 'channels_last' return_sequences = False inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # Tests for statefulness model = Sequential() kwargs = { 'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same' } layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert (out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert (out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert (out4.max() != out5.max()) # cntk doesn't support eval convolution with static # variable, will enable it later if K.backend() != 'cntk': # check regularizers kwargs = { 'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same' } layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={ 'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1 }, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D( filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert (model.predict(inputs).shape == layer.compute_output_shape( inputs.shape))
import keras.optimizers as optimizers import tensorflow as tf rnd = lambda x: np.random.uniform(low=-1.0, high=1.0, size=(x, 300)).astype(np.float32) N = 500 same_p1 = rnd(N) same_p2 = rnd(N) nsame_p1 = rnd(N) nsame_p2 = rnd(N) a = 'softsign' m = Sequential() m.add(Dense(300, input_shape=(300,), activation=a, activity_regularizer=regularizers.L1L2())) m.add(Dense(150, activation=a)) m.add(Dense(250, activation=a)) m.add(Dense(300, activation='sigmoid')) w = m.get_weights() for x in w: x.fill(0) import pdb; pdb.set_trace() def nmap(vecs): for i in range(len(vecs)): vecs[i] = m.predict(vecs[i:i+1]) nmap(same_p1)
def create_model_bn2(nvariables, lr=0.001, clipnorm=10., nodes1=64, nodes2=32, nodes3=16, discr_loss_weight=1.0, l1_reg=0.0, l2_reg=0.0, use_bn=True): # Adding 1 BN layer right after the input layer regularizer = regularizers.L1L2(l1=l1_reg, l2=l2_reg) inputs = Input(shape=(nvariables, ), dtype='float32') x = inputs x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Dense(nodes1, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)(x) if use_bn: x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Activation('tanh')(x) if nodes2: x = Dense(nodes2, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)(x) if use_bn: x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Activation('tanh')(x) if nodes3: x = Dense(nodes3, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)(x) if use_bn: x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Activation('tanh')(x) # Output nodes regr = Dense(1, activation='linear', kernel_initializer='glorot_uniform', name='regr')(x) discr = Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform', name='discr')(x) # Create model model = Model(inputs=inputs, outputs=[regr, discr]) # Set loss and optimizers adam = optimizers.Adam(lr=lr, clipnorm=clipnorm) model.compile( optimizer=adam, loss={ 'regr': masked_huber_loss, 'discr': masked_binary_crossentropy }, #loss={'regr': unmasked_huber_loss, 'discr': masked_binary_crossentropy}, loss_weights={ 'regr': 1.0, 'discr': discr_loss_weight }, #metrics={'regr': ['acc', 'mse', 'mae'], 'discr': ['acc',]} ) model.summary() return model
def train(weights, bias, lr = 0.001, l1_reg = 0, l2_reg = 0.005, batch_size = 32, epochs = 100, till_convergence = False): ''' Function that uses activation maximisation to extract optimal embeddings for a set of words. ''' print('L2_REG:', l2_reg) # Build model opt = Adam(lr = float(lr)) X_input = Input((1,), name = 'Input') X_tensor = Embedding(weights.shape[0], weights.shape[1], input_length = 1, #weights=[weights], trainable = True, embeddings_regularizer = regularizers.L1L2(float(l1_reg), float(l2_reg)))(X_input) X_tensor = Flatten(name = 'flatten')(X_tensor) X_output = Dense(weights.shape[0], activation = 'softmax', name = 'softmax_out', trainable = False)(X_tensor) model = Model(inputs = X_input, outputs = X_output) model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = ['accuracy']) print(model.get_weights()[0].shape) print(model.summary()) # set softmax weights model.layers[-1].set_weights([weights.T, bias.T]) # Train Model print('Training Model') X = list(range(weights.shape[0])) y = list(range(weights.shape[0])) if till_convergence == True: current_loss = 1000 previous_loss = 10000 while True: if abs(current_loss - previous_loss) < 0.005: break model.fit(X, to_categorical(y, weights.shape[0]), epochs = 1, batch_size = batch_size, shuffle = True, verbose = 0) previous_loss = current_loss current_loss = model.history.history['loss'][-1] sys.stdout.write('\r' + ' Acc: ' + str(round(model.history.history['acc'][-1], 4)) + ' Loss: ' + str(round(model.history.history['loss'][-1], 4)) + ' Diff: ' + str(round(abs(current_loss - previous_loss), 4))) if str(round(model.history.history['acc'][-1], 4)) == 1: break else: model.fit(X, to_categorical(y, weights.shape[0]), epochs = epochs, batch_size = batch_size, shuffle = True, verbose = 1) print('') return model.get_weights()[0]
def __init__(self, SNN_hparams, fl): self.hparams = SNN_hparams self.features_c_dim = fl.features_c_dim # Second step: Concat features_c and fingerprint vectors ==> form intermediate input vector (IIV) # IIV put into single SNN net # Defining half_model_model half_c = Input(shape=(fl.features_c_dim, )) # singlenet for the SNN. Same weights and biases for top and bottom half of SNN singlenet = Sequential() hidden_layers = self.hparams['hidden_layers'] numel = len(hidden_layers) generator_dropout = self.hparams.get('dropout', 0) singlenet.add( Dense(hidden_layers[0], input_dim=self.features_c_dim, activation=self.hparams['activation'], kernel_regularizer=regularizers.L1L2( l1=self.hparams['singlenet_l1'], l2=self.hparams['singlenet_l2']))) if generator_dropout != 0: singlenet.add(Dropout(generator_dropout)) if numel > 1: if hidden_layers[ 1] != 0: # Even if hidden layers has 2 elements, 2nd element may be 0 for i in range(numel - 1): singlenet.add( Dense(hidden_layers[i + 1], activation=self.hparams['activation'], kernel_regularizer=regularizers.L1L2( l1=self.hparams['singlenet_l1'], l2=self.hparams['singlenet_l2']))) singlenet.add( Dense(self.hparams.get('feature_vector_dim', 10), activation='sigmoid')) # Output of half_model encoded_half = singlenet(half_c) # Make half_model callable half_model = Model(name='half_model_encoded', input=half_c, output=encoded_half) # All steps together by calling the above models one after another. # fp model ==> half_model ==> L1 distance and final node model lc = Input(name='left_c', shape=(fl.features_c_dim, )) rc = Input(name='right_c', shape=(fl.features_c_dim, )) # Apply half_model to left and right side encoded_l = half_model(lc) encoded_r = half_model(rc) # layer to merge two encoded inputs with the l1 distance between them L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) # call this layer on list of two input tensors. L1_distance = L1_layer([encoded_l, encoded_r]) prediction = Dense(1, activation='sigmoid')(L1_distance) self.siamese_net = Model(name='final_model', input=[lc, rc], output=prediction) if self.hparams.get('learning_rate', None) is None: self.siamese_net.compile(loss='binary_crossentropy', optimizer=self.hparams['optimizer']) else: sgd = optimizers.Adam(lr=self.hparams['learning_rate']) self.siamese_net.compile(loss='binary_crossentropy', optimizer=sgd)
def __init__(self, SNN_hparams, fl): self.hparams = SNN_hparams self.features_c_dim = fl.features_c_dim self.features_d_count = fl.features_d_count self.fp_length = [ SNN_hparams['fp_length'] for _ in range(SNN_hparams['fp_number']) ] self.conv_width = [ SNN_hparams['conv_width'] for _ in range(SNN_hparams['conv_number']) ] # First step: SMILES ==> fingerprint vector # Defining fingerprint(fp) model half_features_d = [] half_conv_net = [] for idx in range(fl.features_d_count): # Creating left input tensors for features_d. # For each molecule # Make one input tensor for atoms, bond, edge tensor half_features_d.append([ Input(name='h_a_inputs_' + str(idx) + 'x', shape=fl.features_d_a[idx][0].shape[1:]), Input(name='h_b_inputs_' + str(idx) + 'y', shape=fl.features_d_a[idx][1].shape[1:]), Input(name='h_e_inputs_' + str(idx) + 'z', shape=fl.features_d_a[idx][2].shape[1:], dtype='int32') ]) single_molecule = half_features_d[-1] # Building the half_conv_net for that particular molecule single_molecule_half_conv_net = build_graph_conv_net_fp_only( single_molecule, conv_layer_sizes=self.conv_width, fp_layer_size=self.fp_length, conv_activation=self.hparams['conv_activation'], conv_l1=self.hparams['conv_l1'], conv_l2=self.hparams['conv_l2'], fp_activation='softmax') single_molecule_half_conv_net_model = Model( name='h_fp_' + str(idx), inputs=single_molecule, outputs=single_molecule_half_conv_net) half_conv_net.append(single_molecule_half_conv_net_model) # Second step: Concat features_c and fingerprint vectors ==> form intermediate input vector (IIV) # IIV put into single SNN net # Defining half_model_model half_c = Input(shape=(fl.features_c_dim, )) half_fp = [ Input(shape=(self.fp_length[0], )) for _ in range(fl.features_d_count) ] # Concat left side 4 inputs and right side 4 inputs half_combined = merge.Concatenate()([half_c] + half_fp) # singlenet for the SNN. Same weights and biases for top and bottom half of SNN singlenet = Sequential() hidden_layers = self.hparams['hidden_layers'] numel = len(hidden_layers) generator_dropout = self.hparams.get('dropout', 0) singlenet.add( Dense(hidden_layers[0], input_dim=self.features_c_dim + self.features_d_count * self.hparams['fp_length'], activation=self.hparams['activation'], kernel_regularizer=regularizers.L1L2( l1=self.hparams['singlenet_l1'], l2=self.hparams['singlenet_l2']))) if generator_dropout != 0: singlenet.add(Dropout(generator_dropout)) if numel > 1: if hidden_layers[ 1] != 0: # Even if hidden layers has 2 elements, 2nd element may be 0 for i in range(numel - 1): singlenet.add( Dense(hidden_layers[i + 1], activation=self.hparams['activation'], kernel_regularizer=regularizers.L1L2( l1=self.hparams['singlenet_l1'], l2=self.hparams['singlenet_l2']))) singlenet.add( Dense(self.hparams.get('feature_vector_dim', 10), activation='sigmoid')) # Output of half_model encoded_half = singlenet(half_combined) # Make half_model callable half_model = Model(name='half_model_encoded', input=[half_c] + half_fp, output=encoded_half) # All steps together by calling the above models one after another. # fp model ==> half_model ==> L1 distance and final node model lc = Input(name='left_c', shape=(fl.features_c_dim, )) left_features_d = [] left_fp_model = [] rc = Input(name='right_c', shape=(fl.features_c_dim, )) right_features_d = [] right_fp_model = [] for idx in range(fl.features_d_count): # a = atom tensor, b = bond tensor, e = edge tensor left_features_d.append([ Input(name='l_a_inputs_' + str(idx) + 'x', shape=fl.features_d_a[idx][0].shape[1:]), Input(name='l_b_inputs_' + str(idx) + 'y', shape=fl.features_d_a[idx][1].shape[1:]), Input(name='l_e_inputs_' + str(idx) + 'z', shape=fl.features_d_a[idx][2].shape[1:], dtype='int32') ]) # Call fp model for each set of left molecules left_fp_model.append(half_conv_net[idx](left_features_d[-1])) # Same as left side. Just change left to right. right_features_d.append([ Input(name='r_a_inputs_' + str(idx) + 'x', shape=fl.features_d_a[idx][0].shape[1:]), Input(name='r_b_inputs_' + str(idx) + 'y', shape=fl.features_d_a[idx][1].shape[1:]), Input(name='r_e_inputs_' + str(idx) + 'z', shape=fl.features_d_a[idx][2].shape[1:], dtype='int32') ]) # Call fp model for each set of right molecules right_fp_model.append(half_conv_net[idx](right_features_d[-1])) # Apply half_model to left and right side encoded_l = half_model([lc] + left_fp_model) encoded_r = half_model([rc] + right_fp_model) # layer to merge two encoded inputs with the l1 distance between them L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) # call this layer on list of two input tensors. L1_distance = L1_layer([encoded_l, encoded_r]) prediction = Dense(1, activation='sigmoid')(L1_distance) self.siamese_net = Model(name='final_model', input=[lc] + [ left_tensor for molecule in left_features_d for left_tensor in molecule ] + [rc] + [ right_tensor for molecule in right_features_d for right_tensor in molecule ], output=prediction) if self.hparams.get('learning_rate', None) is None: self.siamese_net.compile(loss='binary_crossentropy', optimizer=self.hparams['optimizer']) else: sgd = optimizers.Adam(lr=self.hparams['learning_rate']) self.siamese_net.compile(loss='binary_crossentropy', optimizer=sgd)
def create_model_bn_charge_cnn(nvariables, lr=0.001, clipnorm=10., nodes1=64, nodes2=32, nodes3=16, discr_loss_weight=1.0, l1_reg=0.0, l2_reg=0.0, use_bn=True, use_dropout=False): regularizer = regularizers.L1L2(l1=l1_reg, l2=l2_reg) #inputs = Input(shape=(nvariables,), dtype='float32') # MK dnn_input = Input(shape=(nvariables, ), dtype='float32', name='inputDNN') cnn_input = Input(shape=(5, 2), dtype='float32', name='inputCNN') xcnn = layers.Conv1D(10, 3, activation='relu')(cnn_input) xcnn = layers.Conv1D(10, 2, activation='relu')(xcnn) xcnn = layers.Flatten()(xcnn) x = Dense(nodes1, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)(dnn_input) if use_bn: x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Activation('tanh')(x) if use_dropout: x = Dropout(0.2)(x) if nodes2: x = Dense(nodes2, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)(x) if use_bn: x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Activation('tanh')(x) if use_dropout: x = Dropout(0.2)(x) if nodes3: concatenated = layers.concatenate([x, xcnn], axis=-1) x = Dense(nodes3, kernel_initializer='glorot_uniform', kernel_regularizer=regularizer, use_bias=False)(concatenated) if use_bn: x = BatchNormalization(epsilon=1e-4, momentum=0.9)(x) x = Activation('tanh')(x) if use_dropout: x = Dropout(0.2)(x) # Output nodes regr = Dense(1, activation='linear', kernel_initializer='glorot_uniform', name='regr')(x) discr = Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform', name='discr')(x) #MK: adding charge prediction output charge_prediction = Dense(1, activation='sigmoid', name='charge_prediction')(x) # Create model #MK: adding charge prediction output model = Model(inputs=[dnn_input, cnn_input], outputs=[regr, discr, charge_prediction]) # Set loss and optimizers adam = optimizers.Adam(lr=lr, clipnorm=clipnorm) # MK: loss function specification for charge model.compile( optimizer=adam, loss={ 'regr': masked_huber_loss, 'discr': masked_binary_crossentropy, 'charge_prediction': MKmasked_binary_crossentropy }, loss_weights={ 'regr': 1.0 / discr_loss_weight, 'discr': 1.0, 'charge_prediction': 1.0 }, #metrics={'regr': ['acc', 'mse', 'mae'], 'discr': ['acc',]} ) model.summary() return model