def __init__(self, input_dim=None, dense_layers=[128, 128, 128], dropout_layers=[0.1, 0.1], embedding_dim=2, embedding_l2=0.1, name='base', **kwargs): super(BaseModel, self).__init__(name=name, **kwargs) self.input_dim = input_dim self.dense_layers = dense_layers self.dropout_layers = dropout_layers self.embedding_dim = embedding_dim self.embedding_l2 = embedding_l2 self.alphadrop = [None] * len(dropout_layers) self.dense_proj = [None] * len(dense_layers) for i, v in enumerate(self.dense_layers): self.dense_proj[i] = layers.Dense( v, activation='selu', kernel_initializer='lecun_normal', name="Dense_" + str(i)) for i, v in enumerate(self.dropout_layers): self.alphadrop[i] = layers.AlphaDropout(rate=v, name="AlphaDropout_" + str(i)) self.embed = layers.Dense(self.embedding_dim, kernel_regularizer=tf.keras.regularizers.l2( self.embedding_l2), name="embedding_layer") return None
def getDropoutLayers(self,activation,dropout_rate): if activation=='selu': dropout_layers = [tfl.AlphaDropout(dropout_rate) for i in range(self.num_layers)] else: dropout_layers = [tfl.Dropout(dropout_rate) for i in range(self.num_layers)] return dropout_layers
def _build_model( hp: HyperParameters, input_layer: KerasTensor, encoded_layer: KerasTensor, ) -> keras.Model: """Build the part of the architecture tunable by keras-tuner. Note: It is a relatively simple dense network, with self-normalizing layers. Args: hp: hyperparameters passed by the tuner. input layer: The input layer of the model. encoded_layer: The encoding layer of the model. Returns: A tunable keras functional model. """ x = encoded_layer for i in range(hp.Int("dense_layers", 1, 3, default=2)): x = layers.Dense( units=hp.Int(f"units_layer_{i + 1}", min_value=32, max_value=256, step=32, default=64), activation="selu", kernel_initializer=tf.keras.initializers.LecunNormal(), )(encoded_layer) x = layers.AlphaDropout(0.5)(x) output_layer = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(input_layer, output_layer) model.compile( optimizer=keras.optimizers.Adam( hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4], default=1e-3)), loss="binary_crossentropy", metrics=[ "accuracy", tfa.metrics.F1Score(num_classes=2, average="micro", threshold=0.5, name="f1_score"), ], ) return model
def build_model(hc_model, width=1024, depth=2, dropout_rate=0.5, nclasses=4, mode='dense', activation='softmax', selu=False, mc_dropout=False, l2_reg=1e-4): """ PixelNet: define an MLP model over a hypercolumn model given as input @article{pixelnet, title={Pixel{N}et: {R}epresentation of the pixels, by the pixels, and for the pixels}, author={Bansal, Aayush and Chen, Xinlei, and Russell, Bryan and Gupta, Abhinav and Ramanan, Deva}, Journal={arXiv preprint arXiv:1702.06506}, year={2017} } From the paper and their notes on github, it seems like the semantic segmentation task should work either with linear classifier + BatchNorm, or with MLP without BatchNorm. activation: activation function for prediction layer. 'softmax' for classification, 'linear' for regression. """ x = hc_model.output nchannels = x.shape[-1] x = flatten_pixels(nchannels)(x) if selu: for idx in range(depth): x = dense_selu(x, width, name='mlp{}'.format(idx + 1), l2_reg=l2_reg) x = layers.AlphaDropout(dropout_rate)(x) else: for idx in range(depth): x = dense_bn(x, width, name='mlp{}'.format(idx + 1), l2_reg=l2_reg) x = layers.Dropout(dropout_rate)(x, training=mc_dropout) x = layers.Dense(nclasses, activation=activation, name='predictions')(x) x = unflatten_pixels(hc_model.inputs, nclasses=nclasses, mode=mode)(x) return models.Model(inputs=hc_model.inputs, outputs=x)
def __init__(self): super(RL, self).__init__() model_dir = cfg.DIR self.bert = TFBertModel.from_pretrained(model_dir) self.bilstm = layers.Bidirectional(layers.LSTM(cfg.HIDDENT_SIZE, return_sequences=True), name='bilstm') self.fusion_dense = layers.Dense(cfg.HIDDENT_SIZE, name='fusion_dense', activation=gelu) self.p_dense = layers.Dense(14, name='p_dense') self.dropout = layers.AlphaDropout(0.3) self.cce = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True)
def dense(x, dims, activation='relu', batchnorm=True, dropout_rate=0): if activation == 'selu': x = layers.Dense(dims, activation='selu', kernel_initializer='lecun_normal', bias_initializer='zeros')(x) if dropout_rate: x = layers.AlphaDropout(dropout_rate)(x) elif activation == 'relu': x = layers.Dense(dims, activation='relu')(x) if batchnorm: x = layers.BatchNormalization()(x) if dropout_rate: x = layers.Dropout(dropout_rate)(x) else: msg = 'Unkown activation function: %s' % activation ValueError(msg) return x
def __init__(self, drate=0.1, encoder_shape=[32, 32], latent_dim=32, activation="relu", name='encoder', dynamic=False, **kwargs): super(Encoder, self).__init__(name=name, **kwargs) self.encoder_shape = encoder_shape self.drop0 = layers.Dropout(rate=drate) self.alphadrop = layers.AlphaDropout(rate=drate) self.dense_proj = [None] * len(encoder_shape) for i, v in enumerate(self.encoder_shape): self.dense_proj[i] = layers.Dense(v, activation=activation) self.dense_mean = layers.Dense(latent_dim) self.dense_log_var = layers.Dense(latent_dim)
def __init__( self, original_dim, # encoder, activation="relu", drate=0.1, decoder_shape=[32, 32], name='decoder', **kwargs): super(Decoder, self).__init__(name=name, **kwargs) self.decoder_shape = decoder_shape self.drop = layers.Dropout(rate=drate) self.alphadrop = layers.AlphaDropout(rate=drate) self.dense_proj = [None] * len(decoder_shape) for i, v in enumerate(self.decoder_shape): self.dense_proj[i] = layers.Dense(v, activation=activation) self.dense_output = layers.Dense( original_dim) # ,kernel_regularizer=l1_l2(l1=0.001, l2=0.001))
def dense(x, dims, activation="relu", batchnorm=True, dropout_rate=0): if activation == "selu": x = layers.Dense( dims, activation="selu", kernel_initializer="lecun_normal", bias_initializer="zeros", )(x) if dropout_rate: x = layers.AlphaDropout(dropout_rate)(x) elif activation == "relu": x = layers.Dense(dims, activation="relu")(x) if batchnorm: x = layers.BatchNormalization()(x) if dropout_rate: x = layers.Dropout(dropout_rate)(x) else: msg = "Unknown activation function: %s" % activation ValueError(msg) return x
y_test) = keras.datasets.cifar10.load_data() X_train = X_train_full[5000:] y_train = y_train_full[5000:] X_valid = X_train_full[:5000] y_valid = y_train_full[:5000] model = keras.models.Sequential() model.add(layers.Flatten(input_shape=[32, 32, 3])) model.add(layers.BatchNormalization()) for _i in range(20): model.add( layers.Dense(100, kernel_initializer="lecun_normal", activation="selu")) model.add(layers.AlphaDropout(rate=0.1)) model.add(layers.Dense(10, activation="softmax")) optimizer = keras.optimizers.SGD(lr=1e-2) model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) early_stopping_cb = keras.callbacks.EarlyStopping(patience=20) callbacks = [early_stopping_cb] X_means = X_train.mean(axis=0) X_stds = X_train.std(axis=0) X_train_scaled = (X_train - X_means) / X_stds X_valid_scaled = (X_valid - X_means) / X_stds X_test_scaled = (X_test - X_means) / X_stds