def init_model(backbone_model_name, freeze_backbone_for_N_epochs, input_shape, region_num, attribute_name_to_label_encoder_dict, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor, use_adaptive_l1_l2_regularizer, min_value_in_clipping, max_value_in_clipping, share_last_block=False): def _add_objective_module(input_tensor): # Add GlobalAveragePooling2D if len(K.int_shape(input_tensor)) == 4: global_average_pooling_tensor = GlobalAveragePooling2D()( input_tensor) else: global_average_pooling_tensor = input_tensor if min_value_in_clipping is not None and max_value_in_clipping is not None: global_average_pooling_tensor = Lambda( lambda x: K.clip(x, min_value=min_value_in_clipping, max_value=max_value_in_clipping))( global_average_pooling_tensor) # https://arxiv.org/abs/1801.07698v1 Section 3.2.2 Output setting # https://arxiv.org/abs/1807.11042 classification_input_tensor = global_average_pooling_tensor classification_embedding_tensor = BatchNormalization( scale=True, epsilon=2e-5)(classification_input_tensor) # Add categorical crossentropy loss assert len(attribute_name_to_label_encoder_dict) == 1 # label_encoder = attribute_name_to_label_encoder_dict["identity_ID"] # class_num = len(label_encoder.classes_) # TODO: hardcoded for Market1501 model class_num = 751 classification_output_tensor = Dense( units=class_num, use_bias=False, kernel_initializer=RandomNormal( mean=0.0, stddev=0.001))(classification_embedding_tensor) classification_output_tensor = Activation("softmax")( classification_output_tensor) # Add miscellaneous loss miscellaneous_input_tensor = global_average_pooling_tensor miscellaneous_embedding_tensor = miscellaneous_input_tensor miscellaneous_output_tensor = miscellaneous_input_tensor return classification_output_tensor, classification_embedding_tensor, miscellaneous_output_tensor, miscellaneous_embedding_tensor def _apply_concatenation(tensor_list): if len(tensor_list) == 1: return tensor_list[0] else: return Concatenate()(tensor_list) def _triplet_hermans_loss(y_true, y_pred, metric="euclidean", margin="soft"): # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. dists = cdist(y_pred, y_pred, metric=metric) loss = batch_hard(dists=dists, pids=tf.argmax(y_true, axis=-1), margin=margin) return loss # Initiation classification_output_tensor_list = [] classification_embedding_tensor_list = [] miscellaneous_output_tensor_list = [] miscellaneous_embedding_tensor_list = [] # Initiate the early blocks model_instantiation = getattr(applications, backbone_model_name, None) assert model_instantiation is not None, "Backbone {} is not supported.".format( backbone_model_name) submodel_list, preprocess_input = model_instantiation( input_shape=input_shape) vanilla_input_tensor = Input(shape=K.int_shape(submodel_list[0].input)[1:]) intermediate_output_tensor = vanilla_input_tensor for submodel in submodel_list[:-1]: if freeze_backbone_for_N_epochs > 0: submodel.trainable = False intermediate_output_tensor = submodel(intermediate_output_tensor) # Initiate the last blocks last_block = submodel_list[-1] last_block_for_global_branch_model = replicate_model( last_block, name="last_block_for_global_branch") if freeze_backbone_for_N_epochs > 0: last_block_for_global_branch_model.trainable = False if share_last_block: last_block_for_regional_branch_model = last_block_for_global_branch_model else: last_block_for_regional_branch_model = replicate_model( last_block, name="last_block_for_regional_branch") if freeze_backbone_for_N_epochs > 0: last_block_for_regional_branch_model.trainable = False # Add the global branch classification_output_tensor, classification_embedding_tensor, miscellaneous_output_tensor, miscellaneous_embedding_tensor = _add_objective_module( last_block_for_global_branch_model(intermediate_output_tensor)) classification_output_tensor_list.append(classification_output_tensor) classification_embedding_tensor_list.append( classification_embedding_tensor) miscellaneous_output_tensor_list.append(miscellaneous_output_tensor) miscellaneous_embedding_tensor_list.append(miscellaneous_embedding_tensor) # Add the regional branch if region_num > 0: # Process each region regional_branch_output_tensor = last_block_for_regional_branch_model( intermediate_output_tensor) total_height = K.int_shape(regional_branch_output_tensor)[1] region_size = total_height // region_num for region_index in np.arange(region_num): # Get a slice of feature maps start_index = region_index * region_size end_index = (region_index + 1) * region_size if region_index == region_num - 1: end_index = total_height sliced_regional_branch_output_tensor = Lambda( lambda x, start_index=start_index, end_index=end_index: x[:, start_index:end_index])(regional_branch_output_tensor) # Downsampling sliced_regional_branch_output_tensor = Conv2D( filters=K.int_shape(sliced_regional_branch_output_tensor)[-1] // region_num, kernel_size=3, padding="same")(sliced_regional_branch_output_tensor) sliced_regional_branch_output_tensor = Activation("relu")( sliced_regional_branch_output_tensor) # Add the regional branch classification_output_tensor, classification_embedding_tensor, miscellaneous_output_tensor, miscellaneous_embedding_tensor = _add_objective_module( sliced_regional_branch_output_tensor) classification_output_tensor_list.append( classification_output_tensor) classification_embedding_tensor_list.append( classification_embedding_tensor) miscellaneous_output_tensor_list.append( miscellaneous_output_tensor) miscellaneous_embedding_tensor_list.append( miscellaneous_embedding_tensor) # Define the merged model embedding_tensor_list = [ _apply_concatenation(miscellaneous_embedding_tensor_list) ] embedding_size_list = [ K.int_shape(embedding_tensor)[1] for embedding_tensor in embedding_tensor_list ] merged_embedding_tensor = _apply_concatenation(embedding_tensor_list) merged_model = Model(inputs=[vanilla_input_tensor], outputs=classification_output_tensor_list + miscellaneous_output_tensor_list + [merged_embedding_tensor]) merged_model = specify_regularizers(merged_model, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor, use_adaptive_l1_l2_regularizer) # Define the models for training/inference training_model = Model(inputs=[merged_model.input], outputs=merged_model.output[:-1], name="training_model") inference_model = Model(inputs=[merged_model.input], outputs=[merged_model.output[-1]], name="inference_model") inference_model.embedding_size_list = embedding_size_list # Compile the model classification_loss_function_list = [ "categorical_crossentropy" ] * len(classification_output_tensor_list) triplet_hermans_loss_function = lambda y_true, y_pred: 1.0 * _triplet_hermans_loss( y_true, y_pred) miscellaneous_loss_function_list = [ triplet_hermans_loss_function ] * len(miscellaneous_output_tensor_list) training_model.compile_kwargs = { "optimizer": Adam(), "loss": classification_loss_function_list + miscellaneous_loss_function_list } training_model.compile(**training_model.compile_kwargs) # Print the summary of the models # summarize_model(training_model) # summarize_model(inference_model) return training_model, inference_model, preprocess_input
def init_model(backbone_model_name, freeze_backbone_for_N_epochs, input_shape, region_num, attribute_name_to_label_encoder_dict, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor, pooling_mode, min_value, max_value, use_horizontal_flipping): def _add_pooling_module(input_tensor): # Add a global pooling layer output_tensor = input_tensor if len(K.int_shape(output_tensor)) == 4: if pooling_mode == "Average": output_tensor = GlobalAveragePooling2D()(output_tensor) elif pooling_mode == "Max": output_tensor = GlobalMaxPooling2D()(output_tensor) elif pooling_mode == "GeM": output_tensor = GlobalGeMPooling2D()(output_tensor) else: assert False, "{} is an invalid argument!".format(pooling_mode) # Add the clipping operation if min_value is not None and max_value is not None: output_tensor = Lambda(lambda x: K.clip( x, min_value=min_value, max_value=max_value))(output_tensor) return output_tensor def _add_classification_module(input_tensor): # Add a batch normalization layer output_tensor = input_tensor output_tensor = BatchNormalization(epsilon=2e-5)(output_tensor) # Add a dense layer with softmax activation label_encoder = attribute_name_to_label_encoder_dict["identity_ID"] class_num = len(label_encoder.classes_) output_tensor = Dense(units=class_num, use_bias=False, kernel_initializer=RandomNormal( mean=0.0, stddev=0.001))(output_tensor) output_tensor = Activation("softmax")(output_tensor) return output_tensor def _triplet_hermans_loss(y_true, y_pred, metric="euclidean", margin="soft"): # Create the loss in two steps: # 1. Compute all pairwise distances according to the specified metric. # 2. For each anchor along the first dimension, compute its loss. dists = cdist(y_pred, y_pred, metric=metric) loss = batch_hard(dists=dists, pids=tf.argmax(y_true, axis=-1), margin=margin) return loss # Initiation miscellaneous_output_tensor_list = [] # Initiate the early blocks applications_instance = Applications() model_name_to_model_function = applications_instance.get_model_name_to_model_function( ) assert backbone_model_name in model_name_to_model_function.keys( ), "Backbone {} is not supported.".format(backbone_model_name) model_function = model_name_to_model_function[backbone_model_name] blocks = applications_instance.get_model_in_blocks( model_function=model_function, include_top=False) vanilla_input_tensor = Input(shape=input_shape) intermediate_output_tensor = vanilla_input_tensor for block in blocks[:-1]: block = Applications.wrap_block(block, intermediate_output_tensor) intermediate_output_tensor = block(intermediate_output_tensor) # Initiate the last blocks last_block = Applications.wrap_block(blocks[-1], intermediate_output_tensor) last_block_for_global_branch_model = replicate_model( model=last_block, suffix="global_branch") last_block_for_regional_branch_model = replicate_model( model=last_block, suffix="regional_branch") # Add the global branch miscellaneous_output_tensor = _add_pooling_module( input_tensor=last_block_for_global_branch_model( intermediate_output_tensor)) miscellaneous_output_tensor_list.append(miscellaneous_output_tensor) # Add the regional branch if region_num > 0: # Process each region regional_branch_output_tensor = last_block_for_regional_branch_model( intermediate_output_tensor) total_height = K.int_shape(regional_branch_output_tensor)[1] region_size = total_height // region_num for region_index in np.arange(region_num): # Get a slice of feature maps start_index = region_index * region_size end_index = (region_index + 1) * region_size if region_index == region_num - 1: end_index = total_height sliced_regional_branch_output_tensor = Lambda( lambda x, start_index=start_index, end_index=end_index: x[:, start_index:end_index])(regional_branch_output_tensor) # Downsampling sliced_regional_branch_output_tensor = Conv2D( filters=K.int_shape(sliced_regional_branch_output_tensor)[-1] // region_num, kernel_size=3, padding="same")(sliced_regional_branch_output_tensor) sliced_regional_branch_output_tensor = Activation("relu")( sliced_regional_branch_output_tensor) # Add the regional branch miscellaneous_output_tensor = _add_pooling_module( input_tensor=sliced_regional_branch_output_tensor) miscellaneous_output_tensor_list.append( miscellaneous_output_tensor) # Define the model used in inference inference_model = Model(inputs=[vanilla_input_tensor], outputs=miscellaneous_output_tensor_list, name="inference_model") specify_regularizers(inference_model, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor) # Define the model used in classification classification_input_tensor_list = [ Input(shape=K.int_shape(item)[1:]) for item in miscellaneous_output_tensor_list ] classification_output_tensor_list = [] for classification_input_tensor in classification_input_tensor_list: classification_output_tensor = _add_classification_module( input_tensor=classification_input_tensor) classification_output_tensor_list.append(classification_output_tensor) classification_model = Model(inputs=classification_input_tensor_list, outputs=classification_output_tensor_list, name="classification_model") specify_regularizers(classification_model, kernel_regularization_factor, bias_regularization_factor, gamma_regularization_factor, beta_regularization_factor) # Define the model used in training expand = lambda x: x if isinstance(x, list) else [x] vanilla_input_tensor = Input(shape=K.int_shape(inference_model.input)[1:]) vanilla_feature_tensor_list = expand(inference_model(vanilla_input_tensor)) if use_horizontal_flipping: flipped_input_tensor = tf.image.flip_left_right(vanilla_input_tensor) flipped_feature_tensor_list = expand( inference_model(flipped_input_tensor)) merged_feature_tensor_list = [ sum(item_tuple) / 2 for item_tuple in zip( vanilla_feature_tensor_list, flipped_feature_tensor_list) ] else: merged_feature_tensor_list = vanilla_feature_tensor_list miscellaneous_output_tensor_list = merged_feature_tensor_list classification_output_tensor_list = expand( classification_model(merged_feature_tensor_list)) training_model = Model(inputs=[vanilla_input_tensor], outputs=miscellaneous_output_tensor_list + classification_output_tensor_list, name="training_model") # Add the flipping loss if use_horizontal_flipping: flipping_loss_list = [ K.mean(mean_squared_error(*item_tuple)) for item_tuple in zip( vanilla_feature_tensor_list, flipped_feature_tensor_list) ] flipping_loss = sum(flipping_loss_list) training_model.add_metric(flipping_loss, name="flipping", aggregation="mean") training_model.add_loss(1.0 * flipping_loss) # Compile the model triplet_hermans_loss_function = lambda y_true, y_pred: 1.0 * _triplet_hermans_loss( y_true, y_pred) miscellaneous_loss_function_list = [ triplet_hermans_loss_function ] * len(miscellaneous_output_tensor_list) categorical_crossentropy_loss_function = lambda y_true, y_pred: 1.0 * categorical_crossentropy( y_true, y_pred, from_logits=False, label_smoothing=0.0) classification_loss_function_list = [ categorical_crossentropy_loss_function ] * len(classification_output_tensor_list) training_model.compile_kwargs = { "optimizer": Adam(), "loss": miscellaneous_loss_function_list + classification_loss_function_list } if freeze_backbone_for_N_epochs > 0: specify_trainable(model=training_model, trainable=False, keywords=[block.name for block in blocks]) training_model.compile(**training_model.compile_kwargs) # Print the summary of the training model summarize_model(training_model) return training_model, inference_model