def bert_module(query, key, value, i): # Multi headed self-attention attention_output = layers.MultiHeadAttention( num_heads=config.NUM_HEAD, key_dim=config.EMBED_DIM // config.NUM_HEAD, name="encoder_{}/multiheadattention".format(i), )(query, key, value) attention_output = layers.Dropout( 0.1, name="encoder_{}/att_dropout".format(i))(attention_output) attention_output = layers.LayerNormalization( epsilon=1e-6, name="encoder_{}/att_layernormalization".format(i))(query + attention_output) # Feed-forward layer ffn = keras.Sequential( [ layers.Dense(config.FF_DIM, activation="relu"), layers.Dense(config.EMBED_DIM), ], name="encoder_{}/ffn".format(i), ) ffn_output = ffn(attention_output) ffn_output = layers.Dropout( 0.1, name="encoder_{}/ffn_dropout".format(i))(ffn_output) sequence_output = layers.LayerNormalization( epsilon=1e-6, name="encoder_{}/ffn_layernormalization".format(i))(attention_output + ffn_output) return sequence_output
def encoder_layer(hparams, name="encoder_layer"): inputs = tf.keras.Input(shape=(None, hparams.d_model), name="inputs") padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask") attention = MultiHeadAttention(hparams, name="attention")({ 'query': inputs, 'key': inputs, 'value': inputs, 'mask': padding_mask }) attention = layers.Dropout(hparams.dropout)(attention) attention += tf.cast(inputs, dtype=tf.float32) attention = layers.LayerNormalization(epsilon=1e-6)(attention) outputs = layers.Dense(hparams.num_units, activation=hparams.activation)(attention) outputs = layers.Dense(hparams.d_model)(outputs) outputs = layers.Dropout(hparams.dropout)(outputs) outputs += attention outputs = layers.LayerNormalization(epsilon=1e-6)(outputs) return tf.keras.Model(inputs=[inputs, padding_mask], outputs=outputs, name=name)
def __init__(self, **kwargs): super(CapsSimilarity, self).__init__(**kwargs) self.layer_normal1 = layers.LayerNormalization() # self.dot = layers.Dot((2, 2), normalize=True) self.dot = layers.Dot((2, 2)) self.layer_normal2 = layers.LayerNormalization() self.activation = layers.ELU()
def get_D_and_C(n_atts, input_shape=(256, 256, 3), dim=64, fc_dim=1024, n_downsamplings=5, weight_decay=0.0): # n_atts:特征的数量,也是分类器最后一层的输出维度 inputs = layers.Input(shape=input_shape) # outs = [] # 判别器与分类器共享的卷积层 # D/C: 256x256x3 ==> 128x128x64 ==> 64x64x128 ==> 32x32x256 ==> 16x16x512 ==> 8x8x1024 ==> 65536 h = layers.Conv2D( dim, 4, strides=2, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay))(inputs) h = layers.LayerNormalization()(h) h = layers.LeakyReLU()(h) for i in range(n_downsamplings - 1): d = min(dim * 2**(i + 1), 1024) h = layers.Conv2D( d, 4, strides=2, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay))(h) h = layers.LayerNormalization()(h) h = layers.LeakyReLU()(h) h = layers.Flatten()(h) # 判别器拥有的FC层 # 65536 ==> 1024 ==> 1 h_D = layers.Dense( fc_dim, kernel_regularizer=tf.keras.regularizers.l2(weight_decay))(h) # outs.append(h_D) h_D = layers.LayerNormalization()(h_D) # outs.append(h_D) h_D = layers.LeakyReLU()(h_D) # outs.append(h_D) outputs_D = layers.Dense(1)(h_D) # outs.append(outputs_D) # 分类器拥有的FC层 # 65536 ==> 1024 ==> n_atts h_C = layers.Dense( fc_dim, kernel_regularizer=tf.keras.regularizers.l2(weight_decay))(h) h_C = layers.LayerNormalization()(h_C) h_C = layers.LeakyReLU()(h_C) h_C = layers.Dense(n_atts)(h_C) outputs_C = tf.nn.sigmoid(h_C) # D = Model(inputs=inputs, outputs=outs, name='Discriminator') D = Model(inputs=inputs, outputs=outputs_D, name='Discriminator') C = Model(inputs=inputs, outputs=outputs_C, name='Classifier') return D, C
def __init__(self, channels, kernel_size, initial_activation=None, normalization=None, downsample_rate=2, regularization=None): super(DownResBlock, self).__init__() self.out_channels = channels self.in_act = initial_activation self.norm0 = None self.norm1 = None if normalization is "layer": self.norm0 = layers.LayerNormalization(axis=[2]) self.norm1 = layers.LayerNormalization(axis=[2]) elif normalization is "batch": self.norm0 = layers.BatchNormalization() self.norm1 = layers.BatchNormalization() self.conv1 = layers.Conv1D(filters=channels, kernel_size=kernel_size, padding="same", kernel_regularizer=regularization) self.conv1act = layers.LeakyReLU() self.conv2 = layers.Conv1D(filters=channels, kernel_size=kernel_size, padding="same", strides=downsample_rate, kernel_regularizer=regularization) #self.pool = layers.AveragePooling1D(pool_size=downsample_rate, strides=downsample_rate, padding="same") self.shortcut_conv = layers.Conv1D(filters=channels, kernel_size=1, padding="same", kernel_regularizer=regularization) self.shortcut_pool = layers.AveragePooling1D(pool_size=downsample_rate, strides=downsample_rate, padding="same")
def __init__(self, embed_dim, ff_dim, num_heads, **kwargs): super().__init__(**kwargs) self.embed_dim = embed_dim self.ff_dim = ff_dim self.num_heads = num_heads self.attention_1 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=0.1) self.attention_2 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=0.1) self.ffn_layer_1 = layers.Dense(ff_dim, activation="relu") self.ffn_layer_2 = layers.Dense(embed_dim) self.layernorm_1 = layers.LayerNormalization() self.layernorm_2 = layers.LayerNormalization() self.layernorm_3 = layers.LayerNormalization() self.embedding = PositionalEmbedding(embed_dim=EMBED_DIM, sequence_length=SEQ_LENGTH, vocab_size=VOCAB_SIZE) self.out = layers.Dense(VOCAB_SIZE, activation="softmax") self.dropout_1 = layers.Dropout(0.3) self.dropout_2 = layers.Dropout(0.5) self.supports_masking = True
def build(self, input_shape): self.d_model = input_shape[-1] # Self multi head attention self.multi_head_attention_1 = MultiHeadAttention(self.nb_proj) self.dropout_1 = layers.Dropout(rate=self.dropout_rate) self.norm_1 = layers.LayerNormalization(epsilon=1e-6) # Multi head attention combined with encoder output self.multi_head_attention_2 = MultiHeadAttention(self.nb_proj) self.dropout_2 = layers.Dropout(rate=self.dropout_rate) self.norm_2 = layers.LayerNormalization(epsilon=1e-6) # Feed foward self.dense_1 = layers.Dense( units=self.FFN_units, activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.01, seed=3), bias_initializer=initializers.Zeros()) self.dense_2 = layers.Dense( units=self.d_model, kernel_initializer=initializers.RandomNormal(stddev=0.01, seed=3), bias_initializer=initializers.Zeros()) self.dropout_3 = layers.Dropout(rate=self.dropout_rate) self.norm_3 = layers.LayerNormalization(epsilon=1e-6)
def build_model(): model = Sequential([ layers.Dense( 50, input_shape=(test_dataset.shape[-1], ), ), layers.PReLU(alpha_initializer=tf.initializers.constant(0.25)), layers.LayerNormalization(), layers.Dropout(0.5), layers.Dense(50), layers.PReLU(alpha_initializer=tf.initializers.constant(0.25)), layers.LayerNormalization(), layers.Dropout(0.5), layers.Dense(1, activation="sigmoid"), ]) metrics = [ keras.metrics.FalseNegatives(name="fn"), keras.metrics.FalsePositives(name="fp"), keras.metrics.TrueNegatives(name="tn"), keras.metrics.TruePositives(name="tp"), keras.metrics.Precision(name="precision"), keras.metrics.Recall(name="recall"), keras.metrics.AUC(name='auc'), ] model.compile(optimizer=keras.optimizers.Adam(0.001), loss="binary_crossentropy", metrics=metrics) return model
def create_model(self): inputs = layers.Input(shape=(self.num_of_frames, self.frame_size)) # Attention and Normalization res = inputs x = layers.MultiHeadAttention(key_dim=256, num_heads=32, dropout=.5)(inputs, inputs) x = layers.Dropout(.5)(x) x = layers.LayerNormalization()(x) res += x # Feed Forward Part x = layers.Conv1D(filters=512, kernel_size=1)(res) x = layers.PReLU()(x) x = layers.Dropout(.5)(x) x = layers.Conv1D(filters=res.shape[-1], kernel_size=1)(x) x = layers.Dropout(.5)(x) x = layers.LayerNormalization()(x) x += res x = layers.GlobalAveragePooling1D()(x) outputs = layers.Dense(self.num_of_classes, activation='softmax')(x) self.model = tf.keras.Model(inputs, outputs)
def __init__(self, numparticle=6, expscale=False, epsilon=1e-25, activation=tf.keras.activations.tanh, **kwargs): super().__init__(**kwargs) self._numparticle = numparticle self.expscale = expscale self.epsilon = epsilon self.labelmerger = utils.LabelMerger(numparticle=numparticle, expscale=expscale) self.activation = activation self.layer_label_flatten = layers.Flatten() self.layer_data_flatten = layers.Flatten() self.layer_dense1 = layers.Dense(1024, activation=self.activation) self.layer_dense1_norm = layers.LayerNormalization(epsilon=1e-6) self.layer_dense2 = layers.Dense(512, activation=self.activation) self.layer_dense2_norm = layers.LayerNormalization(epsilon=1e-6) self.layer_dense3 = layers.Dense(256, activation=self.activation) self.layer_dense3_norm = layers.LayerNormalization(epsilon=1e-6) self.layer_dense4 = layers.Dense(1, activation=None)
def build(self, hp, inputs=None): """ # Arguments hp: HyperParameters. The hyperparameters for building the model. inputs: Tensor of Shape [batch_size, seq_len] # Returns Output Tensor of shape `[batch_size, seq_len, embedding_dim]`. """ inputs = nest.flatten(inputs) utils.validate_num_inputs(inputs, 1) pretraining = self.pretraining or hp.Choice( 'pretraining', ['random', 'glove', 'fasttext', 'word2vec', 'none'], default='none') embedding_dim = self.embedding_dim or hp.Choice( 'embedding_dim', [32, 64, 128, 256, 512], default=128) num_heads = self.num_heads or hp.Choice('num_heads', [8, 16, 32], default=8) dense_dim = self.dense_dim or hp.Choice( 'dense_dim', [128, 256, 512, 1024, 2048], default=2048) dropout_rate = self.dropout_rate or hp.Choice( 'dropout_rate', [0.0, 0.25, 0.5], default=0) ffn = tf.keras.Sequential([ layers.Dense(dense_dim, activation="relu"), layers.Dense(embedding_dim), ]) layernorm1 = layers.LayerNormalization(epsilon=1e-6) layernorm2 = layers.LayerNormalization(epsilon=1e-6) dropout1 = layers.Dropout(dropout_rate) dropout2 = layers.Dropout(dropout_rate) # Token and Position Embeddings input_node = nest.flatten(inputs)[0] token_embedding = Embedding(max_features=self.max_features, pretraining=pretraining, embedding_dim=embedding_dim, dropout_rate=dropout_rate).build( hp, input_node) maxlen = input_node.shape[-1] batch_size = tf.shape(input_node)[0] positions = self.pos_array_funct(maxlen, batch_size) position_embedding = Embedding(max_features=maxlen, pretraining=pretraining, embedding_dim=embedding_dim, dropout_rate=dropout_rate).build( hp, positions) output_node = tf.keras.layers.Add()( [token_embedding, position_embedding]) attn_output = MultiHeadSelfAttention(embedding_dim, num_heads).build(hp, output_node) attn_output = dropout1(attn_output) add_inputs_1 = tf.keras.layers.Add()([output_node, attn_output]) out1 = layernorm1(add_inputs_1) ffn_output = ffn(out1) ffn_output = dropout2(ffn_output) add_inputs_2 = tf.keras.layers.Add()([out1, ffn_output]) output = layernorm2(add_inputs_2) return output
def create_decoder(num_layers=DEC_LAYERS, num_heads=DEC_NUM_HEADS, image_size=IMAGE_SIZE): inputs = layers.Input((NUM_PATCHES, ENC_PROJECTION_DIM)) x = layers.Dense(DEC_PROJECTION_DIM)(inputs) for _ in range(num_layers): # Layer normalization 1. x1 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x) # Create a multi-head attention layer. attention_output = layers.MultiHeadAttention( num_heads=num_heads, key_dim=DEC_PROJECTION_DIM, dropout=0.1)(x1, x1) # Skip connection 1. x2 = layers.Add()([attention_output, x]) # Layer normalization 2. x3 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x2) # MLP. x3 = mlp(x3, hidden_units=DEC_TRANSFORMER_UNITS, dropout_rate=0.1) # Skip connection 2. x = layers.Add()([x3, x2]) x = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x) x = layers.Flatten()(x) pre_final = layers.Dense(units=image_size * image_size * 3, activation="sigmoid")(x) outputs = layers.Reshape((image_size, image_size, 3))(pre_final) return keras.Model(inputs, outputs, name="mae_decoder")
def create_encoder(num_heads=ENC_NUM_HEADS, num_layers=ENC_LAYERS): inputs = layers.Input((None, ENC_PROJECTION_DIM)) x = inputs for _ in range(num_layers): # Layer normalization 1. x1 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x) # Create a multi-head attention layer. attention_output = layers.MultiHeadAttention( num_heads=num_heads, key_dim=ENC_PROJECTION_DIM, dropout=0.1)(x1, x1) # Skip connection 1. x2 = layers.Add()([attention_output, x]) # Layer normalization 2. x3 = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x2) # MLP. x3 = mlp(x3, hidden_units=ENC_TRANSFORMER_UNITS, dropout_rate=0.1) # Skip connection 2. x = layers.Add()([x3, x2]) outputs = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)(x) return keras.Model(inputs, outputs, name="mae_encoder")
def define_1DCNN(nchan, L, Fs): model = tf.keras.Sequential() model.add(layers.InputLayer((L, nchan), batch_size=None)) model.add(layers.Conv1D(filters=30, kernel_size=64, padding="causal")) model.add(layers.LayerNormalization()) model.add(layers.Activation('elu')) model.add(layers.AveragePooling1D(pool_size=(2))) model.add(layers.Dropout(0.2)) model.add(layers.Conv1D(filters=15, kernel_size=32, padding="causal")) model.add(layers.LayerNormalization()) model.add(layers.Activation('elu')) model.add(layers.AveragePooling1D(pool_size=(2))) model.add(layers.Dropout(0.3)) model.add(layers.Conv1D(filters=10, kernel_size=16, padding="causal")) model.add(layers.LayerNormalization()) model.add(layers.Activation('elu')) model.add(layers.AveragePooling1D(pool_size=(2))) model.add(layers.Dropout(0.4)) model.add(layers.Flatten()) model.add(layers.Dense(15, activation="tanh")) model.add(layers.LayerNormalization()) model.add(layers.Dense(3)) model.add(layers.Activation('softmax')) model.compile(loss=losses.CategoricalCrossentropy(), optimizer=optimizers.Adam(), metrics=['accuracy'], run_eagerly=False) return model
def build_model(x, y): nx = np.empty((len(x), 28, 28, 1)) for i in range(len(x)): nx[i] = x[i].reshape((28, 28, 1)) sigma_function = 'relu' model = keras.Sequential([ keras.Input(shape=(28, 28, 1)), # 28*28*1 -> 28*28*8 layers.Conv2D(8, 2, (1, 1), padding='same', activation=sigma_function), layers.LayerNormalization(), # 28*28*8 -> 19*19*16 layers.Conv2D(16, 2, (2, 2), padding='same', activation=sigma_function), layers.LayerNormalization(), # 19*19*16 -> 19*19*32 layers.Conv2D(32, 2, (1, 1), padding='same', activation=sigma_function), layers.LayerNormalization(), layers.GlobalAvgPool2D(), layers.Dense(10, bias_initializer='one', activation="softmax"), ]) model.summary() adam = tf.keras.optimizers.Adam() model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) return model, nx, y
def __init__(self, dim, num_heads, window_size=7, shift_size=0, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., name=None): super().__init__(name=name) self.dim = dim self.num_heads = num_heads self.window_size = window_size self.shift_size = shift_size self.mlp_ratio = mlp_ratio assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" self.norm1 = layers.LayerNormalization(epsilon=1e-6, name="norm1") self.attn = WindowAttention(dim, window_size=(window_size, window_size), num_heads=num_heads, qkv_bias=qkv_bias, attn_drop_ratio=attn_drop, proj_drop_ratio=drop, name="attn") self.drop_path = layers.Dropout(rate=drop_path, noise_shape=(None, 1, 1)) if drop_path > 0. \ else layers.Activation("linear") self.norm2 = layers.LayerNormalization(epsilon=1e-6, name="norm2") self.mlp = MLP(dim, drop=drop, name="mlp")
def __init__(self, depthlen=288, gen_features=8, numparticle=6, overscale=10.0, expscale=False, activation=tf.keras.activations.relu, **kwargs): super().__init__(**kwargs) self.depthlen = depthlen self.gen_features = gen_features self.overscale = 10.0 self.expscale = expscale self.labelmerger = utils.LabelMerger(numparticle=numparticle, expscale=expscale) self.activation = activation if self.expscale: last_activation = tf.keras.activations.elu else: last_activation = tf.keras.activations.sigmoid self.layer1 = layers.Dense(512, activation=self.activation) self.layer1_norm = layers.LayerNormalization(epsilon=1e-6) self.layer2 = layers.Dense(1024, activation=self.activation) self.layer2_norm = layers.LayerNormalization(epsilon=1e-6) self.layer3 = layers.Dense(self.depthlen * self.gen_features, activation=last_activation)
def create_transformer_module( latent_dim, projection_dim, num_heads, num_transformer_blocks, ffn_units, dropout_rate, ): # input_shape: [1, latent_dim, projection_dim] inputs = layers.Input(shape=(latent_dim, projection_dim)) x0 = inputs # Create multiple layers of the Transformer block. for _ in range(num_transformer_blocks): # Apply layer normalization 1. x1 = layers.LayerNormalization(epsilon=1e-6)(x0) # Create a multi-head self-attention layer. attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim, dropout=0.1)(x1, x1) # Skip connection 1. x2 = layers.Add()([attention_output, x0]) # Apply layer normalization 2. x3 = layers.LayerNormalization(epsilon=1e-6)(x2) # Apply Feedforward network. ffn = create_ffn(hidden_units=ffn_units, dropout_rate=dropout_rate) x3 = ffn(x3) # Skip connection 2. x0 = layers.Add()([x3, x2]) # Create the Keras model. model = keras.Model(inputs=inputs, outputs=x0) return model
def transformer_encoder( x, embedding_dim, mlp_dim, num_heads, dim_coefficient, attention_dropout, projection_dropout, attention_type="external_attention", ): residual_1 = x x = layers.LayerNormalization(epsilon=1e-5)(x) if attention_type == "external_attention": x = external_attention( x, embedding_dim, num_heads, dim_coefficient, attention_dropout, projection_dropout, ) elif attention_type == "self_attention": x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim, dropout=attention_dropout)(x, x) x = layers.add([x, residual_1]) residual_2 = x x = layers.LayerNormalization(epsilon=1e-5)(x) x = mlp(x, embedding_dim, mlp_dim) x = layers.add([x, residual_2]) return x
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): super(TransformerBlock, self).__init__() self.att = MultiHeadAttention(embed_dim, num_heads) self.ffn = self.point_wide_feed_forward_network(embed_dim, ff_dim) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout = layers.Dropout(rate)
def __init__(self, time_dim, m_dim, d_model, num_heads, dff, rate=0.1, imputation_mode=False): super(EncoderLayer, self).__init__() self.imp = imputation_mode self.d_dim = d_model self.t_dim = time_dim self.m_dim = m_dim self.dff = dff self.num_heads = num_heads self.rate = rate self.mha_t = MultiHeadAttention(d_model, num_heads, m_dim) self.mha_m = MultiHeadAttention(d_model, num_heads, time_dim) self.ffn = point_wise_feed_forward_network(d_model, dff) self.layernorm1 = tfl.LayerNormalization(epsilon=1e-6) self.layernorm2 = tfl.LayerNormalization(epsilon=1e-6) self.dropout1 = tfl.Dropout(rate) self.dropout2 = tfl.Dropout(rate)
def message_block(original_atom_state, original_bond_state, connectivity): """ Performs the graph-aware updates """ atom_state = layers.LayerNormalization()(original_atom_state) bond_state = layers.LayerNormalization()(original_bond_state) source_atom = nfp.Gather()([atom_state, nfp.Slice(np.s_[:, :, 1])(connectivity)]) target_atom = nfp.Gather()([atom_state, nfp.Slice(np.s_[:, :, 0])(connectivity)]) # Edge update network new_bond_state = layers.Concatenate()( [source_atom, target_atom, bond_state]) new_bond_state = layers.Dense( 2*atom_features, activation='relu')(new_bond_state) new_bond_state = layers.Dense(atom_features)(new_bond_state) bond_state = layers.Add()([original_bond_state, new_bond_state]) # message function source_atom = layers.Dense(atom_features)(source_atom) messages = layers.Multiply()([source_atom, bond_state]) messages = nfp.Reduce(reduction='sum')( [messages, nfp.Slice(np.s_[:, :, 0])(connectivity), atom_state]) # state transition function messages = layers.Dense(atom_features, activation='relu')(messages) messages = layers.Dense(atom_features)(messages) atom_state = layers.Add()([original_atom_state, messages]) return atom_state, bond_state,
def classify_branch(input_shape=(256, 256, 32), roi_pool_size=[10, 10], num_bbox=400, chan_num=3, projection_dim=100, transformer_layers=4, num_heads=4, crypt_class=False): Input_bbox = Input(shape=(num_bbox, 4)) fmap = Input(shape=input_shape) # Transformer part ========= pooled_features = ROIPoolingLayer(roi_pool_size[0], roi_pool_size[1])([fmap, Input_bbox]) c_p_f = PatchEncoder_w_position(num_bbox, projection_dim, 128)([pooled_features, Input_bbox]) # Create multiple layers of the Transformer block. for _ in range(transformer_layers): # Layer normalization 1. x1 = layers.LayerNormalization(epsilon=1e-6)(c_p_f) # Create a multi-head attention layer. attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim, dropout=0.15)(x1, x1) # Skip connection 1. x2 = layers.Add()([attention_output, c_p_f]) # Layer normalization 2. x3 = layers.LayerNormalization(epsilon=1e-6)(x2) # MLP. x3 = mlp(x3, hidden_units=[projection_dim * 2, projection_dim], dropout_rate=0.15) # Skip connection 2. c_p_f = layers.Add()([x3, x2]) # Create a [batch_size, projection_dim] tensor. c_p_f = layers.LayerNormalization(epsilon=1e-6)(c_p_f) c_p_f = layers.Dropout(0.3)(c_p_f) # increased from 0.2 clone = layers.Dense(1)(c_p_f) partial = layers.Dense(1)(c_p_f) fufi = layers.Dense(1)(c_p_f) clone = layers.Activation('sigmoid', dtype='float32', name='clone')(clone) partial = layers.Activation('sigmoid', dtype='float32', name='partial')(partial) fufi = layers.Activation('sigmoid', dtype='float32', name='fufi')(fufi) if crypt_class: crypt = layers.Dense(1)(c_p_f) crypt = layers.Activation('sigmoid', dtype='float32', name='crclass')(crypt) just_trnsf = Model(inputs=[fmap, Input_bbox], outputs=[clone, partial, fufi, crypt], name="cpf") else: just_trnsf = Model(inputs=[fmap, Input_bbox], outputs=[clone, partial, fufi], name="cpf") return just_trnsf
def __init__(self, n_heads: int, hidden_nodes: int, device: tf.device = None, **kwargs): super().__init__(n_heads, hidden_nodes, device, **kwargs) self.norm1 = layers.LayerNormalization() self.norm2 = layers.LayerNormalization()
def __init__(self, em_dim, num_heads, dff, rate=0.1): super().__init__() self.mha = MultiHeadAttention(em_dim, num_heads) self.ffn = point_wise_feed_forward_network(em_dim, dff) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate)
def build(self, input_shape): self.d_model = input_shape[-1] self.multi_head_attention = MultiHeadAttention(self.nb_proj) self.dropout_1 = layers.Dropout(rate=self.dropout_rate) self.norm_1 = layers.LayerNormalization(epsilon=1e-6) self.dense_1 = layers.Dense(units=self.FFN_units, activation='relu') self.dense_2 = layers.Dense(units=self.d_model, activation='relu') self.dropout_2 = layers.Dropout(rate=self.dropout_rate) self.norm_2 = layers.LayerNormalization(epsilon=1e-6)
def Discriminator(input_shape=(None, None, 3), norm_type='layernorm'): """ PatchGan discriminator model (https://arxiv.org/abs/1611.07004). norm_type: Type of normalization. Either 'batchnorm' or 'instancenorm'. """ initializer = tf.random_normal_initializer(0., 0.02) x = KL.Input(shape=input_shape, name='input_image') inputs = x x = Downsample(64, 4, norm_type, False)(x) # (B, 128, 128, 64) x = Downsample(128, 4, norm_type)(x) # (B, 64, 64, 128) x = Downsample(192, 4, norm_type)(x) # (B, 32, 32, 256) output0 = KL.Conv2D(1, 4, strides=1, kernel_initializer=initializer)(x) # (B, 29, 29, 1) output0 = KL.GlobalAveragePooling2D()(output0) # (B, 1) x = KL.ZeroPadding2D()(x) # (B, 34, 34, 256) x = KL.Conv2D(256, 4, strides=1, kernel_initializer=initializer, use_bias=False)(x) # (B, 31, 31, 512) if norm_type.lower() == 'batchnorm': x = KL.BatchNormalization()(x) elif norm_type.lower() == 'layernorm': x = KL.LayerNormalization()(x) x = KL.LeakyReLU()(x) x = KL.ZeroPadding2D()(x) # (B, 33, 33, 512) output1 = KL.Conv2D(1, 4, strides=1, kernel_initializer=initializer)(x) # (B, 30, 30, 1) output1 = KL.GlobalAveragePooling2D()(output1) # (B, 1) x = KL.Conv2D(256, 4, strides=1, kernel_initializer=initializer, use_bias=False)(x) # (B, 30, 30, 512) if norm_type.lower() == 'batchnorm': x = KL.BatchNormalization()(x) elif norm_type.lower() == 'layernorm': x = KL.LayerNormalization()(x) x = KL.LeakyReLU()(x) x = KL.ZeroPadding2D()(x) # (B, 32, 32, 512) output2 = KL.Conv2D(1, 4, strides=1, kernel_initializer=initializer)(x) # (B, 29, 29, 1) output2 = KL.GlobalAveragePooling2D()(output2) # (B, 1) outputs = output0 + output1 + output2 return KM.Model(inputs=inputs, outputs=outputs)
def build(self, hp, inputs=None): """ # Arguments hp: HyperParameters. The hyperparameters for building the model. inputs: Tensor of Shape [batch_size, seq_len] # Returns Output Tensor of shape `[batch_size, seq_len, embedding_dim]`. """ inputs = nest.flatten(inputs) utils.validate_num_inputs(inputs, 1) pretraining = utils.add_to_hp(self.pretraining, hp) embedding_dim = utils.add_to_hp(self.embedding_dim, hp) num_heads = utils.add_to_hp(self.num_heads, hp) dense_dim = utils.add_to_hp(self.dense_dim, hp) dropout = utils.add_to_hp(self.dropout, hp) ffn = tf.keras.Sequential( [ layers.Dense(dense_dim, activation="relu"), layers.Dense(embedding_dim), ] ) layernorm1 = layers.LayerNormalization(epsilon=1e-6) layernorm2 = layers.LayerNormalization(epsilon=1e-6) dropout1 = layers.Dropout(dropout) dropout2 = layers.Dropout(dropout) # Token and Position Embeddings input_node = nest.flatten(inputs)[0] token_embedding = Embedding( max_features=self.max_features, pretraining=pretraining, embedding_dim=embedding_dim, dropout=dropout, ).build(hp, input_node) maxlen = input_node.shape[-1] batch_size = tf.shape(input_node)[0] positions = self.pos_array_funct(maxlen, batch_size) position_embedding = Embedding( max_features=maxlen, pretraining=pretraining, embedding_dim=embedding_dim, dropout=dropout, ).build(hp, positions) output_node = tf.keras.layers.Add()([token_embedding, position_embedding]) attn_output = MultiHeadSelfAttention(embedding_dim, num_heads).build( hp, output_node ) attn_output = dropout1(attn_output) add_inputs_1 = tf.keras.layers.Add()([output_node, attn_output]) out1 = layernorm1(add_inputs_1) ffn_output = ffn(out1) ffn_output = dropout2(ffn_output) add_inputs_2 = tf.keras.layers.Add()([out1, ffn_output]) return layernorm2(add_inputs_2)
def __init__(self, embed_dim, dense_dim, **kwargs): super(FNetEncoder, self).__init__(**kwargs) self.embed_dim = embed_dim self.dense_dim = dense_dim self.dense_proj = keras.Sequential( [layers.Dense(dense_dim, activation="relu"), layers.Dense(embed_dim),] ) self.layernorm_1 = layers.LayerNormalization() self.layernorm_2 = layers.LayerNormalization()
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): super(TransformerBlock, self).__init__() self.att = MultiHeadSelfAttention(embed_dim, num_heads) self.ffn = keras.Sequential( [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),] ) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate)