def get_qpair_model(): embedding_size = 128 inp1 = layers.Input(shape=(100, )) inp2 = layers.Input(shape=(100, )) x1 = layers.Embedding(6000, embedding_size)(inp1) x2 = layers.Embedding(6000, embedding_size)(inp2) x3 = layers.Bidirectional(layers.LSTM(32, return_sequences=True))(x1) x4 = layers.Bidirectional(layers.LSTM(32, return_sequences=True))(x2) x5 = layers.GlobalMaxPool1D()(x3) x6 = layers.GlobalMaxPool1D()(x4) x7 = layers.dot([x5, x6], axes=1) x8 = layers.Dense(40, activation='relu')(x7) x9 = layers.Dropout(0.05)(x8) x10 = layers.Dense(10, activation='relu')(x9) output = layers.Dense(2, activation="softmax")(x10) model = models.Model(inputs=[inp1, inp2], outputs=output) model.compile(loss='CategoricalCrossentropy', optimizer='adam', metrics=['accuracy']) # batch_size = 100 # epochs = 3 return model
def __init__(self, vocab_size, emb_dim=128, nb_filters=50, FFN_units=512, nb_classes=2, dropout_rate=0.1, training=False, name="dcnn"): super(DCNN, self).__init__(name=name) self.embedding = layers.Embedding(vocab_size, emb_dim) # Layer 1 self.bigram = layers.Conv1D(filters=nb_filters, kernel_size=2, padding="valid", activation="relu") self.pool_1 = layers.GlobalMaxPool1D() # Layer 2 self.trigram = layers.Conv1D(filters=nb_filters, kernel_size=3, padding="valid", activation="relu") self.pool_2 = layers.GlobalMaxPool1D() # Layer 3 self.fourgram = layers.Conv1D(filters=nb_filters, kernel_size=4, padding="valid", activation="relu") self.pool_3 = layers.GlobalMaxPool1D() # Dense Fully Connected Layer self.dense_1 = layers.Dense(units=FFN_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) # Output Layer if nb_classes == 2: self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=nb_classes, activation="softmax")
def __init__(self, vocab_size: int, embed_dim: int, training: bool = False): super(MyAdvancedModel, self).__init__() ### TODO(Students) START # ... self.num_classes = len(ID_TO_CLASS) self.decoder = layers.Dense(units=self.num_classes) self.embeddings = tf.Variable(tf.random.normal( (vocab_size, embed_dim))) self.conv1 = layers.Conv1D(filters=256, kernel_size=2, padding='same', activation='relu') self.max_pool1 = layers.GlobalMaxPool1D() self.conv2 = layers.Conv1D(filters=256, kernel_size=3, padding='same', activation='relu') self.max_pool2 = layers.GlobalMaxPool1D() self.conv3 = layers.Conv1D(filters=256, kernel_size=4, padding='same', activation='relu') self.max_pool3 = layers.GlobalMaxPool1D()
def __init__(self, vocab_size: int, embed_dim: int, hidden_size: int = 128, training: bool = False): super(MyAdvancedModel, self).__init__() ### TODO(Students) START self.num_classes = len(ID_TO_CLASS) self.embeddings = tf.Variable(tf.random.normal( (vocab_size, embed_dim)), trainable=training) # I tried masking but Conv1D does not support masking as of TF 2.0 and # therefore adding a masking layer did not make any difference - https://github.com/keras-team/keras/issues/411 # Adding or removing this does not have any effect on F1 score/loss self._masking = layers.Masking(mask_value=0, input_shape=(None, 200)) # Conv1D with filter size of 256 and kernel size of 3 self.conv1 = layers.Conv1D(filters=256, kernel_size=3, padding="valid", activation="tanh", strides=1) # Using GlobalMaxPool1D as MaxPool1D does not support variable sequence length self.pool1 = layers.GlobalMaxPool1D() # Conv1D with filter size of 128 and kernel size of 3 self.conv2 = layers.Conv1D(filters=128, kernel_size=3, padding="valid", activation="tanh", strides=1) self.pool2 = layers.GlobalMaxPool1D() # Conv1D with filter size of 64 and kernel size of 3 self.conv3 = layers.Conv1D(filters=64, kernel_size=3, padding="valid", activation="tanh", strides=1) self.pool3 = layers.GlobalMaxPool1D() # Layer to concatenate the output of all the CNN layers self.concatenate = layers.Concatenate() # Dropout for inputs self.dropout1 = layers.Dropout(0.5) # Dropout for second last dense layer self.dropout2 = layers.Dropout(0.5) self.dense1 = layers.Dense(100, activation="tanh") self.dense2 = layers.Dense(self.num_classes)
def get_Model_Sequential_Complete(): #print('|' * 100) #print('get_Model_Sequential_Complete') #print('|' * 100) tf.random.set_seed(data.SeedTF) model = Models.Sequential() model.add(Layers.Conv1D(128,kernel_size=(8),input_shape=(data.nDataPoints,1))) model.add(Layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)) model.add(Layers.ReLU()) model.add(Layers.Conv1D(256,kernel_size=(5),input_shape=(data.nDataPoints,1))) model.add(Layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)) model.add(Layers.ReLU()) model.add(Layers.Conv1D(128,kernel_size=(3),input_shape=(data.nDataPoints,1))) model.add(Layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)) model.add(Layers.ReLU()) model.add(Layers.GlobalMaxPool1D(data_format='channels_last')) model.add(Layers.Softmax(axis=-1)) model.compile(optimizer=Optimizer.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08),loss='sparse_categorical_crossentropy',metrics=['accuracy']) return model
def __init__(self, vocabulary_size, embedding_dimensions=EMBEDDING_DIM, cnn_filters=50, dnn_units=512, model_output_classes=3, dropout_rate=0.1, training=False, name="custom_imdb_model"): super(CustomSentiCorefModel, self).__init__(name=name) self.embedding = layers.Embedding(vocabulary_size, embedding_dimensions) self.cnn_layer1 = layers.Conv1D(filters=cnn_filters, kernel_size=2, padding="valid", activation="relu") self.cnn_layer2 = layers.Conv1D(filters=cnn_filters, kernel_size=3, padding="valid", activation="relu") self.pool = layers.GlobalMaxPool1D() self.dense_1 = layers.Dense(units=dnn_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) if model_output_classes == 2: self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=model_output_classes, activation="softmax")
def model_construct(self,detector_length=24,num_detector=32,num_hidden_unit=32,weight_decay=0.01): self.model.add(layers.Conv1D(num_detector,detector_length,input_shape=(self.train_x.shape[1:]),activation='relu',kernel_regularizer=regularizers.l2(weight_decay))) self.model.add(layers.GlobalMaxPool1D()) self.model.add(layers.Dense(num_hidden_unit,activation='relu',kernel_regularizer=regularizers.l2(weight_decay))) self.model.add(layers.Dropout(0.5)) self.model.add(layers.Dense(1,activation='sigmoid')) self.model_constructed=True
def create_cnn_model1(): model = Sequential() # model.add(layers.Flatten(input_shape=(3000, 1))) model.add(layers.Input(shape=(3000, 1))) model.add( layers.Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")) model.add( layers.Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")) model.add(layers.MaxPool1D(pool_size=2)) model.add( layers.Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")) model.add( layers.Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")) # model.add(layers.Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")) model.add(layers.MaxPool1D(pool_size=2)) model.add(layers.SpatialDropout1D(rate=0.01)) model.add( layers.Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")) model.add( layers.Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")) model.add(layers.MaxPool1D(pool_size=2)) model.add(layers.SpatialDropout1D(rate=0.01)) model.add( layers.Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")) model.add( layers.Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")) model.add(layers.GlobalMaxPool1D()) # model.add(layers.Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(5, activation=activations.softmax)) model.compile(optimizer=optimizers.Adam(0.001), loss=losses.sparse_categorical_crossentropy, metrics=['acc']) # model.summary() return model
def create_model(): model = tf.keras.Sequential([ layers.Conv1D(64, 3, activation="relu", input_shape=(8192, 1)), # layers.Conv1D(64, 1, activation="relu", ), layers.BatchNormalization(), layers.MaxPooling1D(2), layers.Conv1D(32, 3, activation="relu"), # layers.Conv1D(32, 1, activation="relu"), layers.Conv1D(16, 3, activation="relu"), layers.Dropout(0.4), layers.BatchNormalization(), layers.MaxPooling1D(2), # layers.Conv1D(16, 1, activation="relu"), layers.Conv1D(8, 3, activation="relu"), # layers.GlobalMaxPool1D(), # layers.Conv1D(8, 1, activation="relu"), layers.BatchNormalization(), layers.GlobalMaxPool1D(), # tf.keras.layers.Flatten(), layers.Dense(8, activation="relu"), layers.Dense(4, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.0001)), layers.Dense(2, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) ]) # Dense(2, kernel_initializer='he_normal', activation='softmax', kernel_regularizer=l2(0.0001)) model.summary() return model
def cnn_base(): model = Sequential(layers=[ layers.Convolution1D(16, kernel_size=5, activation='relu', padding='valid', input_shape=(3000, 1)), layers.Convolution1D( 16, kernel_size=5, activation='relu', padding='valid'), layers.MaxPool1D(pool_size=2), layers.SpatialDropout1D(rate=0.01), layers.Convolution1D( 32, kernel_size=3, activation='relu', padding='valid'), layers.Convolution1D( 32, kernel_size=3, activation='relu', padding='valid'), layers.MaxPool1D(pool_size=2), layers.SpatialDropout1D(rate=0.01), layers.Convolution1D( 32, kernel_size=3, activation='relu', padding='valid'), layers.Convolution1D( 32, kernel_size=3, activation='relu', padding='valid'), layers.MaxPool1D(pool_size=2), layers.Convolution1D( 256, kernel_size=3, activation='relu', padding='valid'), layers.Convolution1D( 256, kernel_size=3, activation='relu', padding='valid'), layers.GlobalMaxPool1D(), layers.Dropout(rate=0.01), layers.Dense(64, activation='relu'), ]) model.compile(optimizer=optimizers.Adam(0.001), loss=losses.sparse_categorical_crossentropy, metrics=['acc']) #,class_model='categorical' return model
def __init__(self, vocab_size, #Size of the vocabulary used. Will be given by the tokenizer emb_dim=128, #128 is just an intuitive default value, it's used so ofter for embedding nb_filters=50, #Number of times we want to apply each filter FFN_units=512, #Number of units of the feedforward neural network at the end nb_classes=2, #Binary classification as default dropout_rate=0.1, #To turn off certain units/parameters to avoid overfitting training=False, #True if the network is in evaluation phase. Drop out will be applied only in training name="dcnn"): #A name for the network super(DCNN, self).__init__(name=name) self.embedding = layers.Embedding(vocab_size, emb_dim) self.bigram = layers.Conv1D(filters=nb_filters, kernel_size=2, padding="valid", #To add the zeros we need to performe the last convolutions activation="relu") self.trigram = layers.Conv1D(filters=nb_filters, kernel_size=3, padding="valid", #To add the zeros we need to performe the last convolutions activation="relu") self.fourgram = layers.Conv1D(filters=nb_filters, kernel_size=4, padding="valid", #To add the zeros we need to performe the last convolutions activation="relu") self.pool = layers.GlobalMaxPool1D() #We'll be using this layer for all pooling steps self.dense_1 = layers.Dense(units=FFN_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) #This is a good place to define dropout since dense_1 with create a lot of params if nb_classes == 2: #Easy way to handle multiclasses self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=nb_classes, activation="softmax")
def create_cnn(total_words=1000, embedded_dimension=300, embedding_matrix=None, input_length=100, optimizer='adam'): # Add an Input Layer input_layer = layers.Input((input_length, )) # Add the word embedding Layer embedding_layer = layers.Embedding(total_words + 1, embedded_dimension, weights=[embedding_matrix], trainable=False)(input_layer) embedding_layer = layers.SpatialDropout1D(0.5)(embedding_layer) # Add the convolutional Layer conv_layer = layers.Convolution1D(100, 3, activation="relu")(embedding_layer) # Add the pooling Layer pooling_layer = layers.GlobalMaxPool1D()(conv_layer) # Add the output Layers output_layer1 = layers.Dense(50, activation="relu")(pooling_layer) output_layer1 = layers.Dropout(0.6)(output_layer1) output_layer2 = layers.Dense(1, activation="sigmoid")(output_layer1) # Compile the model model = models.Model(inputs=input_layer, outputs=output_layer2) model.compile(optimizer=optimizer, loss='binary_crossentropy') return model
def __init__(self, config, cnn_filters=50, dnn_units=512, model_output_classes=2, dropout_rate=0.1, training=False, name="text_model", *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name="bert", trainable=False) self.cnn_layer1 = layers.Conv1D(filters=cnn_filters, kernel_size=2, padding="valid", activation="relu") self.cnn_layer2 = layers.Conv1D(filters=cnn_filters, kernel_size=3, padding="valid", activation="relu") self.cnn_layer3 = layers.Conv1D(filters=cnn_filters, kernel_size=4, padding="valid", activation="relu") self.pool = layers.GlobalMaxPool1D() self.dense_1 = layers.Dense(units=dnn_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) if model_output_classes == 2: self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=model_output_classes, activation="softmax")
def __init__(self, vocabulary_size, embedding_dimensions=128, cnn_filters=100, dnn_units=512, model_output_classes=5, dropout_rate=0.2, training=False, name="text_model"): super(TEXT_MODEL, self).__init__(name=name) self.embedding = layers.Embedding(vocabulary_size, embedding_dimensions) self.cnn_layer1 = layers.Conv1D(filters=cnn_filters, kernel_size=2, padding="valid", activation="relu") self.cnn_layer2 = layers.Conv1D(filters=cnn_filters, kernel_size=3, padding="valid", activation="relu") self.cnn_layer3 = layers.Conv1D(filters=cnn_filters, kernel_size=4, padding="valid", activation="relu") self.pool = layers.GlobalMaxPool1D() self.dense_1 = layers.Dense(units=dnn_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) if model_output_classes == 2: self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=model_output_classes, activation="softmax")
def keras_model_cnn(embedding_matrix, config): """ Creating a CNN model for sentiment modeling """ cnn_model = models.Sequential() cnn_model.add( layers.Embedding(input_length=config['padding_size'], input_dim=config['embeddings_dictionary_size'], output_dim=config['embeddings_vector_size'], weights=[embedding_matrix], trainable=True, name='embedding')) cnn_model.add( layers.Conv1D(filters=100, kernel_size=2, strides=1, padding='valid', activation='relu')) cnn_model.add(layers.GlobalMaxPool1D()) cnn_model.add(layers.Dense(units=100, activation='relu')) cnn_model.add(layers.Dense(units=1, activation='sigmoid')) cnn_model.compile(optimizer='adam', loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy']) return cnn_model
def __init__(self, vocab_size, emb_dim=128, nb_filters=64, FFN_units=512, nb_classes=2, dropout_rate=0.1, training=False, name="dcnn"): super(DCNN, self).__init__(name=name) self.embedding = layers.Embedding(vocab_size, emb_dim) self.bigram = layers.Conv1D(filters=nb_filters, kernel_size=2, padding="valid", activation="relu") self.trigram = layers.Conv1D(filters=nb_filters, kernel_size=3, padding="valid", activation="relu") self.fourgram = layers.Conv1D(filters=nb_filters, kernel_size=4, padding="valid", activation="relu") self.pool = layers.GlobalMaxPool1D() # no training variable so we can # use the same layer for each # pooling step self.dense_1 = layers.Dense(units=FFN_units, activation="relu") self.dropout = layers.Dropout(rate=dropout_rate) if nb_classes == 2: self.last_dense = layers.Dense(units=1, activation="sigmoid") else: self.last_dense = layers.Dense(units=nb_classes, activation="softmax")
def __init__(self, config, conv_filters=100, dense_units=256, dropout_rate=0.2, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.bert = TFBertMainLayer(config, name='bert', trainable=False) self.conv_1 = layers.Conv1D(filters=conv_filters, kernel_size=2, padding='valid', activation='relu') self.conv_2 = layers.Conv1D(filters=conv_filters, kernel_size=3, padding='valid', activation='relu') self.conv_3 = layers.Conv1D(filters=conv_filters, kernel_size=4, padding='valid', activation='relu') self.pool = layers.GlobalMaxPool1D() self.dense_1 = layers.Dense(units=dense_units, activation='relu') self.dropout = layers.Dropout(rate=dropout_rate) self.dense_2 = layers.Dense(units=5, activation='softmax')
def build_model_arc(self) -> None: if tuple(tf.__version__.split('.')) < tuple('2.1.0'.split('.')): logger.warning("Attention layer not serializable because it takes init args " "but doesn't implement get_config. " "Please try Attention layer with tf versions >= 2.1.0. " "Issue: https://github.com/tensorflow/tensorflow/issues/32662") output_dim = self.label_processor.vocab_size config = self.hyper_parameters embed_model = self.embedding.embed_model # Query embeddings of shape [batch_size, Tq, dimension]. query_embeddings = embed_model.output # Value embeddings of shape [batch_size, Tv, dimension]. value_embeddings = embed_model.output # CNN layer. cnn_layer_1 = L.Conv1D(**config['conv_layer1']) # Query encoding of shape [batch_size, Tq, filters]. query_seq_encoding = cnn_layer_1(query_embeddings) # Value encoding of shape [batch_size, Tv, filters]. value_seq_encoding = cnn_layer_1(value_embeddings) cnn_layer_2 = L.Conv1D(**config['conv_layer2']) query_seq_encoding = cnn_layer_2(query_seq_encoding) value_seq_encoding = cnn_layer_2(value_seq_encoding) cnn_layer_3 = L.Conv1D(**config['conv_layer3']) query_seq_encoding = cnn_layer_3(query_seq_encoding) value_seq_encoding = cnn_layer_3(value_seq_encoding) # Query-value attention of shape [batch_size, Tq, filters]. query_value_attention_seq = L.Attention()( [query_seq_encoding, value_seq_encoding]) # Reduce over the sequence axis to produce encodings of shape # [batch_size, filters]. query_encoding = L.GlobalMaxPool1D()(query_seq_encoding) query_value_attention = L.GlobalMaxPool1D()(query_value_attention_seq) # Concatenate query and document encodings to produce a DNN input layer. input_layer = L.Concatenate(axis=-1)([query_encoding, query_value_attention]) output = L.Dense(output_dim, **config['layer_output'])(input_layer) output = self._activation_layer()(output) self.tf_model = keras.Model(embed_model.input, output)
def __init__(self, dim): super(_MP, self).__init__() if dim == 1: self.pool = layers.GlobalMaxPool1D() elif dim == 2: self.pool = layers.GlobalMaxPool2D() elif dim == 3: self.pool = layers.GlobalMaxPool3D()
def __init__(self, vocab_size, embed_size, class_num): super(TextCnn, self).__init__() self.embedding_layer = layers.Embedding(vocab_size, embed_size) self.conv1d_layer = layers.Conv1D(filters=128, kernel_size=5, activation='relu') self.max_pool_layer = layers.GlobalMaxPool1D() self.dense_layer = layers.Dense(units=64, activation='relu') self.output_layer = layers.Dense(units=class_num, activation='softmax')
def __init__(self, config): super(BiLSTMMaxPoolEncoder, self).__init__() self.config = config self.rnn = layers.CuDNNLSTM(units=config.hidden_dim, return_sequences=True) self.bidirectional = layers.Bidirectional(self.rnn) self.dropout = layers.Dropout(config.dropout) self.max_pool = layers.GlobalMaxPool1D()
def create_cnn(model_params): """ This function creates a Deep Convolutional Network based on model_params dictionary :param model_params: dict of model_params :return: keras model """ # Add an Input Layer, expected vectors of 0 and 1, with one vector for each word input_layer = layers.Input((70, )) # Add the word embedding Layer embedding_layer = layers.Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], trainable=True)(input_layer) embedding_layer = layers.SpatialDropout1D( model_params['spatial_dropout'])(embedding_layer) # Add the convolutional Layer for ly in range(model_params['num_conv_blocks']): if ly == 0: conv_layer = layers.Convolution1D( model_params['num_conv_filters'], model_params['filter_size'], activation=model_params['activation_func'])(embedding_layer) else: conv_layer = layers.Convolution1D( model_params['num_conv_filters'] * ly * 2, model_params['filter_size'], activation=model_params['activation_func'])(conv_layer) # Add the pooling Layer pooling_layer = layers.GlobalMaxPool1D()(conv_layer) # Add the output Layers for ly in range(model_params['num_dense_layers']): if ly == 0: output_layer1 = layers.Dense( model_params['num_dense_neurons'], activation=model_params['activation_func'])(pooling_layer) output_layer1 = layers.Dropout( model_params['dense_dropout'])(output_layer1) else: output_layer1 = layers.Dense( model_params['num_dense_neurons'], activation=model_params['activation_func'])(output_layer1) output_layer1 = layers.Dropout( model_params['dense_dropout'])(output_layer1) output_layer2 = layers.Dense(1, activation="sigmoid")(output_layer1) # Compile the model model = models.Model(inputs=input_layer, outputs=output_layer2) model.compile(optimizer=optimizers.Adam( lr=model_params['learning_rate'], decay=model_params['learning_rate'] / model_params['epochs']), loss='binary_crossentropy', metrics=['accuracy']) return model
def create_model(vocab_size, num_labels): model = tf.keras.Sequential([ layers.Embedding(vocab_size, 64, mask_zero=True), layers.Conv1D(64, 5, padding='valid', activation='relu', strides=2), layers.GlobalMaxPool1D(), layers.Dense(num_labels) ]) return model
def LSTM(sequence_length, embedding_layer): return keras.Sequential([ keras.Input(shape=(sequence_length, )), embedding_layer, layers.SpatialDropout1D(0.3), layers.LSTM(100, return_sequences=True), layers.GlobalMaxPool1D(), layers.Dense(50, activation="relu"), layers.Dropout(0.2), layers.Dense(6, activation="sigmoid") ])
def create_model(max_seq_len, cnn_filters, dropout_rate, dnn_units): input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") input_mask = keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="input_mask") segment_ids = keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="segment_ids") _, bert_output = bert_layer([input_ids, input_mask, segment_ids]) #bert_output = bert_layer(input_ids) print("bert shape", bert_output.shape) cnn_layer1 = layers.Conv1D(filters=cnn_filters, kernel_size=2, padding="valid", activation="relu")(bert_output) cnn_layer1 = layers.GlobalMaxPool1D()(cnn_layer1) cnn_layer2 = layers.Conv1D(filters=cnn_filters, kernel_size=3, padding="valid", activation="relu")(bert_output) cnn_layer2 = layers.GlobalMaxPool1D()(cnn_layer2) cnn_layer3 = layers.Conv1D(filters=cnn_filters, kernel_size=4, padding="valid", activation="relu")(bert_output) cnn_layer3 = layers.GlobalMaxPool1D()(cnn_layer3) concatenated = tf.concat([cnn_layer1, cnn_layer2, cnn_layer3], axis=-1) # (batch_size, 3 * cnn_filters) dense_1 = layers.Dense(units=dnn_units, activation="relu")(concatenated) dropout = layers.Dropout(rate=dropout_rate)(concatenated) last_dense = layers.Dense(units=1, activation="sigmoid")(dropout) model = keras.Model(inputs=[input_ids, input_mask, segment_ids], outputs=last_dense) model.build(input_shape=(None, max_seq_len)) return model
def __init__(self, vocab_size: int, embed_dim: int, training: bool = False): super(MyAdvancedModel, self).__init__() ### TODO(Students) START # ... self.vocab_size = vocab_size self.embed_dim = embed_dim self.training = training # window-size = 2 self.first_cnn_layer = layers.Conv1D(128, kernel_size=(2), input_shape=(None, self.embed_dim * 2), activation="tanh") self.first_max_pool = layers.GlobalMaxPool1D() # window-size = 3 self.second_cnn_layer = layers.Conv1D(128, kernel_size=(3), input_shape=(None, self.embed_dim * 2), activation="tanh") self.second_max_pool = layers.GlobalMaxPool1D() # window-size = 4 self.third_cnn_layer = layers.Conv1D(128, kernel_size=(4), input_shape=(None, self.embed_dim * 2), activation="tanh") self.third_max_pool = layers.GlobalMaxPool1D() self.num_classes = len(ID_TO_CLASS) self.dropout_layer = layers.Dropout(0.5) self.decoder = layers.Dense(units=self.num_classes) self.embeddings = tf.Variable(tf.random.normal( (vocab_size, embed_dim)))
def __init__(self, vocab_size, embed_size, class_num): super(TextMultiKernalCnn, self).__init__() self.embedding_layer = layers.Embedding(vocab_size, embed_size) self.conv_layer1 = layers.Conv1D(filters=32, kernel_size=3, activation='relu', padding='same') self.pool_layer1 = layers.GlobalMaxPool1D() self.conv_layer2 = layers.Conv1D(filters=32, kernel_size=4, activation='relu', padding='same') self.pool_layer2 = layers.GlobalMaxPool1D() self.conv_layer3 = layers.Conv1D(filters=32, kernel_size=5, activation='relu', padding='same') self.pool_layer3 = layers.GlobalMaxPool1D() self.dense_layer = layers.Dense(units=64, activation='relu') self.output_layer = layers.Dense(class_num, activation='softmax')
def __init__(self, vocab_size, emb_dim=128, nb_filters=50, FFN_units=512, dropout_rate=0.1, name="dcnn"): super(DCNN, self).__init__(name=name) self.embedding = layers.Embedding(vocab_size, emb_dim) self.bigram = layers.Conv1D(filters=nb_filters, kernel_size=2, padding="valid", activation="relu") self.trigram = layers.Conv1D(filters=nb_filters, kernel_size=3, padding="valid", activation="relu") self.fourgram = layers.Conv1D(filters=nb_filters, kernel_size=4, padding="valid", activation="relu") self.pool = layers.GlobalMaxPool1D() self.dense = layers.Dense(units=FFN_units, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(2 * dropout_rate)) self.dropout = layers.Dropout(rate=dropout_rate) self.last_dense = layers.Dense(units=1, activation="sigmoid")
def BGRU_CNN(sequence_length, embedding_layer): return keras.Sequential([ keras.Input(shape=(sequence_length, )), embedding_layer, layers.SpatialDropout1D(0.3), layers.Bidirectional(layers.GRU(100, return_sequences=True)), layers.Conv1D(100, 3, activation="relu"), layers.GlobalMaxPool1D(), layers.Dense(50, activation="relu"), layers.Dropout(0.2), layers.Dense(6, activation="sigmoid") ])
def keras_model_fn_cpu(model_config, vocab_size, embedding_size, embeddings): """ CPU version of Stacked Bi-LSTM and Bi-GRU with Two Fasttext """ ## hyperparams model_name = model_config['model_name'] num_class = model_config['num_class'] lstm_hs = model_config['lstm_hs'] gru_hs = model_config['gru_hs'] learning_rate = model_config['learning_rate'] with tf.device('/cpu:0'): ## build model inputs = ks.Input(shape=(None, ), dtype='int32', name='inputs') embedded_sequences_ft1 = layers.Embedding(vocab_size, embedding_size, trainable=False, mask_zero=False)(inputs) embedded_sequences_ft2 = layers.Embedding(vocab_size, embedding_size, trainable=False, mask_zero=False)(inputs) concat_embed = layers.concatenate( [embedded_sequences_ft1, embedded_sequences_ft2]) concat_embed = layers.SpatialDropout1D(0.5)(concat_embed) x = layers.Bidirectional( layers.LSTM(lstm_hs, recurrent_activation='sigmoid', return_sequences=True))(concat_embed) x, x_h, x_c = layers.Bidirectional( layers.GRU(gru_hs, reset_after=True, recurrent_activation='sigmoid', return_sequences=True, return_state=True))(x) x_1 = layers.GlobalMaxPool1D()(x) x_2 = layers.GlobalAvgPool1D()(x) x_out = layers.concatenate([x_1, x_2, x_h]) x_out = layers.BatchNormalization()(x_out) outputs = layers.Dense(num_class, activation='softmax', name='outputs')(x_out) # outputs model = ks.Model(inputs, outputs, name=model_name) ## compile model.compile(loss='categorical_crossentropy', optimizer=ks.optimizers.Adam(lr=learning_rate, clipnorm=.25, beta_1=0.7, beta_2=0.99), metrics=[ 'categorical_accuracy', ks.metrics.TopKCategoricalAccuracy(k=3) ]) return model