def __init__(self, load_post_trained_bert, post_trained_bert_file, dropout_rate, pooling_way, mode): super(PosEmotionXModel, self).__init__() self.bert = PostTrainedBert(load_post_trained_bert, post_trained_bert_file) if pooling_way == "max": self.pooling = layers.GlobalMaxPooling1D() else: self.pooling = layers.GlobalAveragePooling1D() self.classifier_part1 = keras.models.Sequential() self.classifier_part1.add(keras.Input(shape=(768, ))) self.classifier_part1.add(layers.Dense(384, activation='selu')) self.dropout = layers.Dropout(dropout_rate) self.classifier_part2 = keras.models.Sequential() self.classifier_part2.add(keras.Input(shape=(384, ))) self.classifier_part2.add(layers.Dense(5, activation='softmax')) self.mode = mode
def __init__(self, hparams, views): super(LateFuseCNN, self).__init__() self.views = views self.view_layers_count = hparams[config.HP_LDEEP_V_LAYERS] self.fuse_layers_count = hparams[config.HP_LDEEP_F_LAYERS] self.channel_down_res_layers = [[ ChannelDownResLayer( channels_out=hparams[config.HP_LDEEP_V_CHANNELS] * (2**l), dropout_rate=hparams[config.HP_LDEEP_DROPOUT], kernel_size=hparams[config.HP_LDEEP_KSIZE], w_norm_clip=hparams[config.HP_LDEEP_WEIGHTNORM]) for c in range(self.views) ] for l in range(self.view_layers_count - 1)] self.channel_down_res_final_layer = [ ChannelDownResLayer( channels_out=hparams[config.HP_LDEEP_V_CHANNELS] * (2**(self.view_layers_count - 1)), dropout_rate=hparams[config.HP_LDEEP_DROPOUT], kernel_size=hparams[config.HP_LDEEP_KSIZE], w_norm_clip=hparams[config.HP_LDEEP_WEIGHTNORM], last_layer=True) for c in range(self.views) ] self.merged_channel_n = hparams[config.HP_LDEEP_V_CHANNELS] * (2**( self.view_layers_count - 1)) * self.views self.down_res_layers = [ ChannelDownResLayer( self.merged_channel_n // (2**l), dropout_rate=hparams[config.HP_LDEEP_DROPOUT], kernel_size=3, #hparams[config.HP_LDEEP_KSIZE], w_norm_clip=hparams[config.HP_LDEEP_WEIGHTNORM]) for l in range(self.fuse_layers_count - 1) ] self.down_res_layer_final = ChannelDownResLayer( self.merged_channel_n // (2**(self.fuse_layers_count - 1)), dropout_rate=hparams[config.HP_LDEEP_DROPOUT], kernel_size=3, #hparams[config.HP_LDEEP_KSIZE], w_norm_clip=hparams[config.HP_LDEEP_WEIGHTNORM], last_layer=True) self.feature_pool = layers.GlobalAveragePooling1D() self.lrelu_out = layers.LeakyReLU() self.dense_out = layers.Dense(units=1, activation='sigmoid')
def __init__(self, cfg, verbose=False): super(Conv, self).__init__(cfg, verbose) self.convs = [] for i, filters in enumerate(cfg.model_conv_filters): use_bias = not cfg.model_bn_in if i == 0 else True conv = layers.Conv1D(filters=filters, kernel_size=3, padding='same', activation='relu', use_bias=use_bias, name=f'conv{i}') self.convs.append(conv) setattr(self, conv.name, conv) self.pool = layers.MaxPool1D(pool_size=2, name='pool') self.dropout = layers.SpatialDropout1D(cfg.model_dropout, name='dropout') self.gap = layers.GlobalAveragePooling1D(name='gap')
def tf_model(text_length, vocab_size, embed_dim=32, num_heads=2, ff_dim=32): # embed_dim: Embedding size for each token # num_heads: Number of attention heads # ff_dim: Hidden layer size in feed forward network inside transformer inputs = layers.Input(shape=(text_length,)) embedding_layer = TokenAndPositionEmbedding( text_length, vocab_size, embed_dim) x = embedding_layer(inputs) transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim) x = transformer_block(x) x = layers.GlobalAveragePooling1D()(x) x = layers.Dropout(0.1)(x) x = layers.Dense(20, activation="relu")(x) x = layers.Dropout(0.1)(x) outputs = layers.Dense(1, activation='sigmoid')(x) model = ks.Model(inputs=inputs, outputs=outputs) return model
def __call__(self, x): inputs = x x = layers.GlobalAveragePooling1D(name=self.prefix + "squeeze_excite/AvgPool")(inputs) x = layers.Reshape((1, self.filters))(x) x = layers.Conv1D(_depth(self.filters * self.se_ratio), kernel_size=3, padding='same', name=self.prefix + "squeeze_excite/Conv")(x) x = layers.ReLU(name=self.prefix + "squeeze_excite/Relu")(x) x = layers.Conv1D(self.filters, kernel_size=1, padding="same", name=self.prefix + 'squeeze_excite/Conv_1')(x) x = hard_sigmoid(x) x = layers.Multiply(name=self.prefix + "squeeze_excite/Mul")([inputs, x]) return x
def __init__(self, num_classes): """ Initializes ResnetBlock :param num_classes: number of classes in input """ super(ResnetBlock, self).__init__() self.resnet1 = ResnetSubBlock(n_feature_maps=64) self.resnet2 = ResnetSubBlock(n_feature_maps=128) self.cnn1 = CNNBlock(filters=128, kernel_size=8) self.cnn2 = CNNBlock(filters=128, kernel_size=5) self.conv = layers.Conv1D(filters=128, kernel_size=3, padding='same') self.bn1 = layers.BatchNormalization() self.bn2 = layers.BatchNormalization() self.shortcut = ShortcutBlock(filters=128, kernel_size=1) self.add = layers.Add() self.relu = layers.Activation(activation='relu') self.gap = layers.GlobalAveragePooling1D() self.out = layers.Dense(num_classes, activation='softmax')
def transformerNetworkConv(trackShape, numHidden, numHeads): inputFeatures = layers.Input(trackShape) inputFeaturesMasked = layers.Masking(mask_value=-999, name="maskFeatures")(inputFeatures) processedFeatures = layers.TimeDistributed( layers.Dense(numHidden, activation='relu'), name='tdDense')(inputFeaturesMasked) layers.Conv1D(4, 2, activation="relu", input_shape=inputFeaturesMasked.shape[1:])(processedFeatures) transformerBlock = TransformerBlock(numHidden, numHeads, numHidden)(inputFeaturesMasked) flattened = layers.GlobalAveragePooling1D()(transformerBlock) dense = layers.Dense(numHidden, activation="relu")(flattened) out = layers.Dense(1, activation="sigmoid")(dense) return keras.models.Model(inputs=inputFeatures, outputs=out)
def create_model(self): self.model = tf.keras.Sequential([ layers.InputLayer(input_shape=(self.num_of_frames, self.frame_size)), layers.LSTM(256, recurrent_dropout=.5, dropout=.5, return_sequences=True), layers.LSTM(256, recurrent_dropout=.5, dropout=.5, return_sequences=True), layers.GlobalAveragePooling1D(), layers.Dense(128), layers.PReLU(), layers.Dropout(.5), layers.Dense(self.num_of_classes, activation='softmax') ])
def get_remaining_time_model(max_case_length, vocab_size, output_dim = 1, embed_dim = 36, num_heads = 4, ff_dim = 64): inputs = layers.Input(shape=(max_case_length,)) # Three time-based features time_inputs = layers.Input(shape=(3,)) x = TokenAndPositionEmbedding(max_case_length, vocab_size, embed_dim)(inputs) x = TransformerBlock(embed_dim, num_heads, ff_dim)(x) x = layers.GlobalAveragePooling1D()(x) x_t = layers.Dense(32, activation="relu")(time_inputs) x = layers.Concatenate()([x, x_t]) x = layers.Dropout(0.1)(x) x = layers.Dense(128, activation="relu")(x) x = layers.Dropout(0.1)(x) outputs = layers.Dense(output_dim, activation="linear")(x) transformer = tf.keras.Model(inputs=[inputs, time_inputs], outputs=outputs, name = "remaining_time_transformer") return transformer
def build_model(): """ build a cnn model :return: """ # model Model = keras.Sequential() Model.add(layers.Conv1D(128, 3, activation='relu')) Model.add(layers.GlobalAveragePooling1D()) Model.add(layers.Dense(64, activation='relu')) Model.add(layers.Dense(1)) # compile & training Model.compile(optimizer="adam", loss=keras.losses.MeanSquaredError(), metrics=['mse']) return Model
def embedding_cls(self): model = tf.keras.Sequential([ layers.Embedding(self.max_features + 1, self.embedding_dim), layers.Dropout(0.2), layers.GlobalAveragePooling1D(), layers.Dropout(0.2), layers.Dense(1) ]) print(f"====>>>>Model summary: \n") print(model.summary()) model.compile( loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=tf.metrics.BinaryAccuracy(threshold=0.0)) return model
def main(pklname): #--------------------------------------------------------------- # load data #--------------------------------------------------------------- tweets, labels, vocab_size = preprocess.load_data_with_labels(pklname) x_train, x_test, y_train, y_test = train_test_split(tweets, labels, train_size=0.8) #--------------------------------------------------------------- # buid model #--------------------------------------------------------------- embedding_dim = 64 model = keras.Sequential([ layers.Embedding(vocab_size, embedding_dim, input_length=MAX_LENGTH_OF_TWEETS), layers.Dense(16, activation="relu"), layers.GlobalAveragePooling1D(), layers.Dense(1, activation="sigmoid") ]) #--------------------------------------------------------------- # compile and train model #--------------------------------------------------------------- model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) print(model.summary()) batch_size = 1024 epochs = 15 history = model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=epochs) #--------------------------------------------------------------- # save model and parameters #--------------------------------------------------------------- model_json_str = model.to_json() open(MODEL_FILE_PATH, "w").write(model_json_str) model.save_weights(PARAMS_PATH)
def NASNetMobile(include_top=True, weights='hasc', input_shape=None, pooling=None, classes=6, classifier_activation='softmax'): if input_shape is None: input_shape = (256 * 3, 1) if weights in ['hasc', 'HASC'] and include_top and classes != 6: raise ValueError('If using `weights` as `"hasc"` with `include_top`' ' as true, `classes` should be 6') model = NASNet(input_shape, 1056, 4, 32, False, 2, classes, classifier_activation) if weights is not None: if weights in ['hasc', "HASC"]: weights = 'weights/nasnetmobile/nasnetmobile_hasc_weights_{}_{}.hdf5'.format( int(input_shape[0]), int(input_shape[1])) # hasc or weights fileで初期化 if os.path.exists(weights): print("Load weights from {}".format(weights)) model.load_weights(weights) else: print("Not exist weights: {}".format(weights)) # topを含まないとき if not include_top: if pooling is None: # topを削除する model = Model(inputs=model.input, outputs=model.layers[-3].output) elif pooling == 'avg': y = layers.GlobalAveragePooling1D()(model.layers[-3].output) model = Model(inputs=model.input, outputs=y) elif pooling == 'max': y = layers.GlobalMaxPooling1D()(model.layers[-3].output) model = Model(inputs=model.input, outputs=y) else: print("Not exist pooling option: {}".format(pooling)) model = Model(inputs=model.input, outputs=model.layers[-3].output) return model
def model_cnn(kwargs): K.clear_session() nn_input = input_[kwargs["input_layer"]](**kwargs["input_params"]) x = embeddings[kwargs["emb_layer"]](**kwargs["emb_params"])(nn_input) x = layers.SpatialDropout1D(0.1)(x) x = layers.Reshape((-1, kwargs["emb_params"]["output_dim"], 1), input_shape=K.int_shape(x))(x) maxpool_pool = [] filter_sizes = [1, 2, 3, 5] for i in range(len(filter_sizes)): conv = layers.Conv2D(32, kernel_size=(filter_sizes[i], kwargs["emb_params"]["output_dim"]), kernel_initializer='he_normal', activation='relu')(x) conv = layers.Reshape((-1, K.int_shape(conv)[3]))(conv) global_pool = layers.GlobalAveragePooling1D()(conv) maxpool_pool.append(global_pool) x = layers.Concatenate(axis=1)(maxpool_pool) x = layers.Flatten()(x) x = layers.Dropout(0.1)(x) x = layers.Dense(32)(x) x = layers.BatchNormalization()(x) x = layers.Activation("relu")(x) x = layers.Dropout(0.3)(x) nn_pred = layers.Dense(kwargs["out_units"], activation=kwargs["out_activation"])(x) model = Model(inputs=nn_input, outputs=nn_pred) model.compile(loss=kwargs["loss"], optimizer=kwargs["optimizer"], metrics=["accuracy"]) sess = K.get_session() init = tf.global_variables_initializer() sess.run(init) return model
def __init__(self, type_embedding_dim=Configuration.type_embedding_dim, encoder_attention_heads_count=4, encoder_ff_first_layer_dim=Configuration. encoder_ff_first_layer_dim, rate=0.1): super(Encoder, self).__init__() self.att = layers.MultiHeadAttention( num_heads=encoder_attention_heads_count, key_dim=type_embedding_dim) self.ffn = keras.Sequential([ layers.Dense(encoder_ff_first_layer_dim, activation="relu"), layers.Dense(type_embedding_dim) ]) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate) self.pool = layers.GlobalAveragePooling1D()
def create_classifier(num_experts, embed_dim, num_tokens_per_batch, ff_dim, num_heads, num_tokens_per_example, vocab_size, dropout_rate): switch = Switch(num_experts, embed_dim, num_tokens_per_batch) transformer_block = TransformerBlock(ff_dim, num_heads, switch) inputs = layers.Input(shape=(num_tokens_per_example, )) embedding_layer = TokenAndPositionEmbedding(num_tokens_per_example, vocab_size, embed_dim) x = embedding_layer(inputs) x = transformer_block(x) x = layers.GlobalAveragePooling1D()(x) x = layers.Dropout(dropout_rate)(x) x = layers.Dense(ff_dim, activation="relu")(x) x = layers.Dropout(dropout_rate)(x) outputs = layers.Dense(2, activation="softmax")(x) classifier = keras.Model(inputs=inputs, outputs=outputs) return classifier
def get_bert_imdb_model(): from transformers import TFBertModel max_len = 512 ## BERT encoder # encoder = TFBertModel.from_pretrained("bert-base-uncased") ## QA Model input_ids = layers.Input(shape=(max_len, ), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len, ), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len, ), dtype=tf.int32) # inputs = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': attention_mask} inputs = [input_ids, token_type_ids, attention_mask] # name = "distilbert-base-uncased" name = "bert-base-cased" # m = TFAutoModelForSequenceClassification.from_pretrained(name) # bert = m.layers[0] bert = TFBertModel.from_pretrained(name) bert.trainable = True bert_outputs = bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask) last_hidden_states = bert_outputs.last_hidden_state avg = layers.GlobalAveragePooling1D()(last_hidden_states) avg = tf.keras.layers.Dense(128, activation='relu')(avg) output = layers.Dense(2, activation="softmax")(avg) model = tf.keras.Model(inputs=inputs, outputs=output) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = tf.keras.optimizers.Adam(lr=5e-5) # optimizer = 'adam' model.compile(optimizer=optimizer, loss=[loss], metrics=['accuracy']) return model
def __init__(self, num_classes): """ Initializes InceptionBlock :param num_classes: number of classes in input """ super(InceptionBlock, self).__init__() self.inception1 = InceptionSubBlock() self.inception2 = InceptionSubBlock() self.inception3 = InceptionSubBlock() self.inception4 = InceptionSubBlock() self.inception5 = InceptionSubBlock() self.inception6 = InceptionSubBlock() self.shortcut1 = ShortcutBlock(filters=128, kernel_size=1, bias=False) self.shortcut2 = ShortcutBlock(filters=128, kernel_size=1, bias=False) self.gap = layers.GlobalAveragePooling1D() self.add1 = layers.Add() self.add2 = layers.Add() self.relu1 = layers.Activation(activation='relu') self.relu2 = layers.Activation(activation='relu') self.out = layers.Dense(num_classes, activation='softmax')
def __init__(self, **kwargs): """ Arguments: classes: Positive integer, number of classes in the output of the fully connected layer. Keyword Arguments: Forwarded to the dense layer. """ super(RegressionHead, self).__init__(**kwargs) self.pooling = layers.GlobalAveragePooling1D() self.dense = layers.Dense( units=1, use_bias=True, activation=None, name='Head_dense', ) self.flatten = layers.Flatten()
def se_block(x, filters, kernel_size, stride=1, change=False): x_short = x x = conv_block(x, filters, kernel_size, strides=1, padding='same') x = layers.Activation(mish)(x) x = layers.Dropout(0.2)(x) x = conv_block(x, filters, kernel_size, strides=stride, padding='same') se = layers.GlobalAveragePooling1D()(x) se = layers.Dense(filters // 16, activation=mish)(se) se = layers.Dense(filters, activation='sigmoid')(se) se = layers.Reshape((1, filters))(se) x = layers.Multiply()([se, x]) if stride > 1 or change: x_short = conv_block(x_short, filters, 1, strides=stride, padding='same') x = layers.Add()([x, x_short]) x = layers.Activation(mish)(x) return x
def build_model(self): model = tf.keras.Sequential([ self.data_loader.create_vectorization_layer(), layers.Embedding(10000, self.context.get_hparam("embedding_dim")), layers.Dropout(0.2), layers.GlobalAveragePooling1D(), layers.Dropout(0.2), layers.Dense(self.context.get_hparam("dense1")) ]) model = self.context.wrap_model(model) optimizer = tf.keras.optimizers.Adam() optimizer = self.context.wrap_optimizer(optimizer) model.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ tf.metrics.SparseCategoricalCrossentropy(from_logits=True), tf.metrics.SparseCategoricalAccuracy() ]) return model
def build_model(hp): activation = hp.Choice('dense_activation', values=['relu', 'tanh', 'sigmoid', 'softmax'], default='relu') model = keras.Sequential([ layers.Embedding(max_features + 1, embedding_dim), layers.Dropout( hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)), layers.GlobalAveragePooling1D(), layers.Dropout( hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)), layers.Dense(4, activation=activation), ]) model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy']) return model
def model1(config, vectorize_layer, regularizer): model = tf.keras.Sequential([ vectorize_layer, layers.Embedding(config.vocab_size + 1, config.embedding_dim), layers.Dropout(config.dropout), layers.GlobalAveragePooling1D(), #layers.Flatten(), layers.Dropout(config.dropout), layers.Dense(config.tmp, activation='relu', kernel_regularizer=regularizer), layers.Dropout(config.dropout), layers.Dense(7, activation='softmax', kernel_regularizer=regularizer) ]) if config.print_model: model.summary() return model
def build(self, input_shape): # Create latent array. self.latent_array = self.add_weight( shape=(self.latent_dim, self.projection_dim), initializer="random_normal", trainable=True, ) # Create patching module. self.patcher = Patches(self.patch_size) # Create patch encoder. self.patch_encoder = PatchEncoder(self.data_dim, self.projection_dim) # Create cross-attenion module. self.cross_attention = create_cross_attention_module( self.latent_dim, self.data_dim, self.projection_dim, self.ffn_units, self.dropout_rate, ) # Create Transformer module. self.transformer = create_transformer_module( self.latent_dim, self.projection_dim, self.num_heads, self.num_transformer_blocks, self.ffn_units, self.dropout_rate, ) # Create global average pooling layer. self.global_average_pooling = layers.GlobalAveragePooling1D() # Create a classification head. self.classification_head = create_ffn( hidden_units=self.classifier_units, dropout_rate=self.dropout_rate) super(Perceiver, self).build(input_shape)
def basic_NN(self, num_features, embedding_dim): ''' Les couches sont empilées séquentiellement pour construire le classificateur : La première couche est une Embedding couche. Cette couche prend les révisions codées en nombres entiers et recherche un vecteur d'intégration pour chaque index de mot. Ces vecteurs sont appris comme les trains miniatures. Les vecteurs ajoutent une dimension au tableau de sortie. Les dimensions résultantes sont les suivantes : (batch, sequence, embedding) Ensuite, une GlobalAveragePooling1D couche renvoie un vecteur de sortie de longueur fixe pour chaque exemple en faisant la moyenne sur la dimension de la séquence. Cela permet au modèle de gérer l'entrée de longueur variable, de la manière la plus simple possible. Ce vecteur de sortie de longueur fixe est canalisé à travers un (entièrement connecté Dense couche) avec 16 unités cachées. La dernière couche est densément connectée à un seul nœud de sortie. Parameters ---------- num_features: int Size of the vocabulary, i.e. maximum integer index + 1. embedding_dim : int Dimension of the dense embedding. Returns ------- Keras Sequential Model ''' model = tf.keras.Sequential([ layers.Embedding(num_features + 1, embedding_dim), layers.Dropout(0.2), layers.GlobalAveragePooling1D(), layers.Dropout(0.2), layers.Dense(1) ]) model.summary() return model
def build_model( input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0, ): inputs = keras.Input(shape=input_shape) x = inputs for _ in range(num_transformer_blocks): x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout) x = layers.GlobalAveragePooling1D(data_format="channels_first")(x) for dim in mlp_units: x = layers.Dense(dim, activation="relu")(x) x = layers.Dropout(mlp_dropout)(x) outputs = layers.Dense(n_classes, activation="softmax")(x) return keras.Model(inputs, outputs)
def build_model(self): ECG = layers.Input(shape=self.input_shape, name='ECG') x = kanres_init(ECG, 64, 32, 8, 3, 1) x = layers.AveragePooling1D(pool_size=2, data_format='channels_last')(x) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = kanres_module(x, 64, 32, 50, 50, 1) x = layers.GlobalAveragePooling1D()(x) output = layers.Dense(self.output_size)(x) model = Model(inputs=ECG, outputs=output) self.model = model
def __init__(self, classes, **kwargs): """ Arguments: classes: Positive integer, number of classes in the output of the fully connected layer. Keyword Arguments: Forwarded to the dense layer. """ super(ClassificationHead, self).__init__(**kwargs) self.global_avg = layers.GlobalAveragePooling1D() self.dense = layers.Dense( units=classes, use_bias=True, activation=None, name='Head_dense2', ) self.softmax = layers.Softmax()
def create_model(): new_model = tf.keras.Sequential([ # This layer takes the integer-encoded text and looks up an embedding vector for each word-index. These vectors are learned as the model trains. layers.Embedding( input_dim=VOCABULARY_SIZE + 1, # Size of the vocabulary (i.e. maximum integer index + 1) output_dim=EMBEDDING_DIM, input_length=MAX_REVIEW_LENGTH ), # Length of input sequences, when it is constant layers.Dropout(0.2), layers.GlobalAveragePooling1D(), layers.Dropout(0.2), layers.Dense(ratings.size, activation="softmax") ]) new_model.compile( loss=losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy']) return new_model
def create_model(self): inputs = layers.Input(shape=(self.config.max_len, )) embedding_layer = TokenAndPositionEmbedding(self.config.max_len, self.config.vocab_size, self.config.embed_dim) x = embedding_layer(inputs) transformer_block = TransformerBlock(self.config.embed_dim, self.config.num_heads, self.config.ff_dim) x = transformer_block(x) x = layers.GlobalAveragePooling1D()(x) x = layers.Dropout(self.config.dropout)(x) x = layers.Dense(self.config.dim_last_dense, activation="relu")(x) x = layers.Dropout(self.config.dropout)(x) outputs = layers.Dense(self.config.labels, activation="softmax")(x) self.model = keras.Model(inputs=inputs, outputs=outputs) if self.config.debug: print(self.model.summary())