def create_model(albert_config, is_training, a_input_ids, a_input_mask, a_segment_ids, b_input_ids, b_input_mask, b_segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" #import pdb #pdb.set_trace() a_model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=a_input_ids, input_mask=a_input_mask, token_type_ids=a_segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) b_model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=b_input_ids, input_mask=b_input_mask, token_type_ids=b_segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. if FLAGS.use_pooled_output: tf.logging.info("using pooled output") a_output_layer = a_model.get_pooled_output() b_output_layer = b_model.get_pooled_output() else: tf.logging.info("using meaned output") a_output_layer = tf.reduce_mean(a_model.get_sequence_output(), axis=1) b_output_layer = tf.reduce_mean(b_model.get_sequence_output(), axis=1) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout a_output_layer = tf.nn.dropout(a_output_layer, keep_prob=0.9, name='a_dropout') b_output_layer = tf.nn.dropout(b_output_layer, keep_prob=0.9, name='a_dropout') from tensorflow.math import l2_normalize, reduce_sum a_l2_norm = l2_normalize(a_output_layer, axis=-1) b_l2_norm = l2_normalize(b_output_layer, axis=-1) predictions = reduce_sum(a_l2_norm*b_l2_norm, axis = -1)#batch_size 1 from tensorflow.keras.losses import MSE loss = MSE(labels, predictions) return (a_output_layer, loss, predictions)
def call(self, inputs, mask=None): if mask is not None: inputs = ragged.boolean_mask(inputs, mask).to_tensor() return math.l2_normalize(inputs, axis=-1)
def create_model(self): if self.model_number == 0: model = Sequential() model.add( Dense(64, activation='relu', input_shape=(self.input_feature_size, ))) model.add(Dense(self.embedding_size, activation='sigmoid')) if self.model_number == 1: model = Sequential() model.add( Dense(256, activation='relu', input_shape=(self.input_feature_size, ))) model.add(Dense(256, activation='relu')) model.add(Dense(self.embedding_size, activation='sigmoid')) if self.model_number == 2: model = Sequential() model.add( Reshape(self.input_feature_dim, input_shape=(self.input_feature_size, ))) if self.model_number == 3: model = Sequential() model.add( Reshape(self.input_feature_dim, input_shape=(self.input_feature_size, ))) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(128, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(self.embedding_size, activation='sigmoid')) if self.model_number == 4: model = Sequential() model.add( Reshape(self.input_feature_dim, input_shape=(self.input_feature_size, ))) model.add( Conv2D(64, (10, 10), activation='relu', input_shape=self.input_feature_dim)) model.add(MaxPooling2D()) model.add(Conv2D(128, (7, 7), activation='relu')) model.add(MaxPooling2D()) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dense(128, activation='relu')) model.add(Dense(self.embedding_size, activation='sigmoid')) if self.model_number == 5: model = Sequential() model.add( Reshape(self.input_feature_dim, input_shape=(self.input_feature_size, ))) model.add( Conv2D(64, (10, 10), activation='relu', input_shape=self.input_feature_dim)) model.add(MaxPooling2D()) model.add(Conv2D(128, (7, 7), activation='relu')) model.add(MaxPooling2D()) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dense(128, activation='relu')) model.add(Dense( self.embedding_size, activation=None)) # no activation on the final dense layer Lambda( lambda x: l2_normalize(x, axis=1)) # L2 normalize embeddings # Not sure if this normalization layer is necessary...also activation in the dense layer used to be sigmoid # Save initial weights. self.init_model_weights = model.get_weights() return model
weights='imagenet', input_shape=IMG_SHAPE) elif BASE_MODEL == 'MobileNetV2': base_model = tf.keras.applications.MobileNetV2(include_top=False, weights='imagenet', input_shape=IMG_SHAPE) else: raise ValueError('BASE_MODEL has an invalid string.') base_model.trainable = False NetVLAD_layer = netvlad.NetVLAD(K=k_value) dim_expansion = Lambda(lambda a: tf.expand_dims(a, axis=-2)) reduction_layer = Conv2D(embed_dimension, (1, 1)) dense_reduction_layer = Dense(embed_dimension) l2_normalization_layer = Lambda(lambda a: l2_normalize(a, axis=-1)) model = tf.keras.Sequential( [base_model, NetVLAD_layer, dense_reduction_layer, l2_normalization_layer]) ### --------------------------------------------------------------------------- ### Use pre-trained model or train conv5 too ---------------------------------- if USE_TRAINED_WEIGHTS: model = tf.keras.models.load_model( trained_weights_path, custom_objects={'NetVLAD': netvlad.NetVLAD}) else: pass if TRAIN_CONV5: model.layers[0].trainable = True fine_tune_at = 15