def CompoundNet_VGG19(include_top=True, weights=None, input_tensor=None, input_shape=None, fusion_strategy='concatenate', mode='fine_tuning', pooling_mode='avg', classes=9, data_augm_enabled=False): """Instantiates the CompoundNet VGG19 architecture fine-tuned (2 steps) on Human Rights Archive dataset. Optionally loads weights pre-trained on Human Rights Archive Database. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'HRA' (pre-training on Human Rights Archive), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 input channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. fusion_strategy: one of `concatenate` (feature vectors of different sources are concatenated into one super-vector), `average` (the feature set is averaged) or `maximum` (selects the highest value from the corresponding features). mode: one of `feature_extraction` (freeze all but the penultimate layer and re-train the last Dense layer) or `fine_tuning` (unfreeze the lower convolutional layers and retrain more layers). pooling_mode: Optional pooling_mode mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling_mode will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling_mode will be applied. classes: optional number of classes to classify images into, only to be specified if `weights` argument is `None`. data_augm_enabled: whether to use the augmented samples during training. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`. """ if not (weights in {'HRA', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `HRA` ' '(pre-training on Human Rights Archive), ' 'or the path to the weights file to be loaded.') if not (fusion_strategy in {'concatenate', 'average', 'maximum'}): raise ValueError( 'The `fusion_strategy` argument should be either ' '`concatenate` (feature vectors of different sources are concatenated into one super-vector), ' '`average` (the feature set is averaged) ' 'or `maximum` (selects the highest value from the corresponding features).' ) if not (pooling_mode in {'avg', 'max', 'flatten'}): raise ValueError('The `pooling_mode` argument should be either ' '`avg` (GlobalAveragePooling2D), `max` ' '(GlobalMaxPooling2D), ' 'or `flatten` (Flatten).') if weights == 'HRA' and classes != 9: raise ValueError( 'If using `weights` as Human Rights Archive, `classes` should be 9.' ) cache_subdir = 'HRA_models' # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor input_tensor = Input(shape=(224, 224, 3)) object_centric_model = VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) scene_centric_model = VGG16_Places365(input_tensor=input_tensor, weights='places', include_top=False) # retrieve the ouputs object_model_output = object_centric_model.output scene_model_output = scene_centric_model.output # We will feed the extracted features to a merging layer if fusion_strategy == 'concatenate': merged = concatenate([object_model_output, scene_model_output]) elif fusion_strategy == 'average': merged = average([object_model_output, scene_model_output]) else: merged = maximum([object_model_output, scene_model_output]) if include_top: if pooling_mode == 'avg': x = GlobalAveragePooling2D(name='GAP')(merged) elif pooling_mode == 'max': x = GlobalMaxPooling2D(name='GMP')(merged) elif pooling_mode == 'flatten': x = Flatten(name='FLATTEN')(merged) x = Dense(256, activation='relu', name='FC1')(x) # let's add a fully-connected layer # When random init is enabled, we want to include Dropout, # otherwise when loading a pre-trained HRA model we want to omit # Dropout layer so the visualisations are done properly (there is an issue if it is included) if weights is None: x = Dropout(0.5, name='DROPOUT')(x) # and a logistic layer with the number of classes defined by the `classes` argument x = Dense(classes, activation='softmax', name='PREDICTIONS')(x) # new softmax layer # Ensure that the model takes into account any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # this is the transfer learning model we will train model = Model(inputs=inputs, outputs=x, name='CompoundNet-VGG19') # load weights if weights == 'HRA': if include_top: if mode == 'feature_extraction': for layer in object_centric_model.layers: layer.trainable = False for layer in scene_centric_model.layers: layer.trainable = False model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy') if data_augm_enabled: if fusion_strategy == 'concatenate': if pooling_mode == 'avg': weights_path = get_file( AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_fname, AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_fname, AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_fname, AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'average': if pooling_mode == 'avg': weights_path = get_file( AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_fname, AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_fname, AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_fname, AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'maximum': if pooling_mode == 'avg': weights_path = get_file( AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_fname, AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_fname, AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_fname, AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) else: if fusion_strategy == 'concatenate': if pooling_mode == 'avg': weights_path = get_file( FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_fname, FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_fname, FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_fname, FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'average': if pooling_mode == 'avg': weights_path = get_file( FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_fname, FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_fname, FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_fname, FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'maximum': if pooling_mode == 'avg': weights_path = get_file( FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_fname, FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_fname, FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_fname, FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif mode == 'fine_tuning': for layer in model.layers[:36]: layer.trainable = False for layer in model.layers[36:]: layer.trainable = True model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy') if data_augm_enabled: if fusion_strategy == 'concatenate': if pooling_mode == 'avg': weights_path = get_file( AUGM_FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_fname, AUGM_FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FINE_TUNING_CONCATENATE_FUSION_FLATTEN_fname, AUGM_FINE_TUNING_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_fname, AUGM_FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'average': if pooling_mode == 'avg': weights_path = get_file( AUGM_FINE_TUNING_AVERAGE_FUSION_AVG_POOL_fname, AUGM_FINE_TUNING_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FINE_TUNING_AVERAGE_FUSION_FLATTEN_fname, AUGM_FINE_TUNING_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FINE_TUNING_AVERAGE_FUSION_MAX_POOL_fname, AUGM_FINE_TUNING_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'maximum': if pooling_mode == 'avg': weights_path = get_file( AUGM_FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_fname, AUGM_FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FINE_TUNING_MAXIMUM_FUSION_FLATTEN_fname, AUGM_FINE_TUNING_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_fname, AUGM_FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) else: if fusion_strategy == 'concatenate': if pooling_mode == 'avg': weights_path = get_file( FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_fname, FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FINE_TUNING_CONCATENATE_FUSION_FLATTEN_fname, FINE_TUNING_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_fname, FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'average': if pooling_mode == 'avg': weights_path = get_file( FINE_TUNING_AVERAGE_FUSION_AVG_POOL_fname, FINE_TUNING_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FINE_TUNING_AVERAGE_FUSION_FLATTEN_fname, FINE_TUNING_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FINE_TUNING_AVERAGE_FUSION_MAX_POOL_fname, FINE_TUNING_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif fusion_strategy == 'maximum': if pooling_mode == 'avg': weights_path = get_file( FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_fname, FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FINE_TUNING_MAXIMUM_FUSION_FLATTEN_fname, FINE_TUNING_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_fname, FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) else: if fusion_strategy == 'average': weights_path = get_file( FINE_TUNING_AVERAGE_FUSION_NO_TOP_fname, FINE_TUNING_AVERAGE_FUSION_WEIGHTS_PATH_NO_TOP, cache_subdir=cache_subdir) elif fusion_strategy == 'concatenate': weights_path = get_file( FINE_TUNING_CONCATENATE_FUSION_NO_TOP_fname, FINE_TUNING_CONCATENATE_FUSION_WEIGHTS_PATH_NO_TOP, cache_subdir=cache_subdir) elif fusion_strategy == 'maximum': weights_path = get_file( FINE_TUNING_MAXIMUM_FUSION_NO_TOP_fname, FINE_TUNING_MAXIMUM_FUSION_WEIGHTS_PATH_NO_TOP, cache_subdir=cache_subdir) model.load_weights(weights_path) return model
def __init__(self, hdf5_file, body_backbone_CNN, image_backbone_CNN, nb_of_epochs, weights_to_file, modelCheckpoint_quantity, earlyStopping_quantity, CSVLogger_filename): if not (body_backbone_CNN in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}): raise ValueError('The `body_backbone_CNN` argument should be either ' '`VGG16`, `VGG19`, `ResNet50` or `VGG16_Places365`. ') if not (image_backbone_CNN in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}): raise ValueError('The `image_backbone_CNN` argument should be either ' '`VGG16`, `VGG19`, `ResNet50` or `VGG16_Places365`. ') self.body_backbone_CNN = body_backbone_CNN self.image_backbone_CNN = image_backbone_CNN # -------------------------------------------------------------------------------- # # Construct EMOTIC model # -------------------------------------------------------------------------------- # body_inputs = Input(shape=(224, 224, 3), name='INPUT') image_inputs = Input(shape=(224, 224, 3), name='INPUT') # Body module if 'VGG16' == body_backbone_CNN: self.body_truncated_model = VGG16(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg') elif 'VGG19' == body_backbone_CNN: self.body_truncated_model = VGG19(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg') elif 'ResNet50' == body_backbone_CNN: tmp_model = ResNet50(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg') self.body_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output) elif 'VGG16_Places365' == body_backbone_CNN: self.body_truncated_model = VGG16_Places365(include_top=False, weights='places', input_tensor=body_inputs, pooling='avg') for layer in self.body_truncated_model.layers: layer.name = str("body-") + layer.name print('[INFO] The plain, body `' + body_backbone_CNN + '` pre-trained convnet was successfully initialised.') # Image module if 'VGG16' == image_backbone_CNN: self.image_truncated_model = VGG16(include_top=False, weights='imagenet', input_tensor=image_inputs, pooling='avg') elif 'VGG19' == image_backbone_CNN: self.image_truncated_model = VGG19(include_top=False, weights='imagenet', input_tensor=image_inputs, pooling='avg') elif 'ResNet50' == image_backbone_CNN: tmp_model = ResNet50(include_top=False, weights='imagenet',input_tensor=image_inputs, pooling='avg') self.image_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output) elif 'VGG16_Places365' == image_backbone_CNN: self.image_truncated_model = VGG16_Places365(include_top=False, weights='places', input_tensor=image_inputs, pooling='avg') for layer in self.image_truncated_model.layers: layer.name = str("image-") + layer.name print('[INFO] The plain, image `' + image_backbone_CNN + '` pre-trained convnet was successfully initialised.') # retrieve the ouputs body_plain_model_output = self.body_truncated_model.output image_plain_model_output = self.image_truncated_model.output # In case ResNet50 is selected we need to use a global average pooling layer to follow the process used for the othe CNNs. if 'ResNet50' == body_backbone_CNN: body_plain_model_output = GlobalAveragePooling2D(name='GAP')(body_plain_model_output) if 'ResNet50' == image_backbone_CNN: image_plain_model_output = GlobalAveragePooling2D(name='GAP')(image_plain_model_output) merged = concatenate([body_plain_model_output, image_plain_model_output]) x = Dense(256, activation='relu', name='FC1', kernel_regularizer=regularizers.l2(0.01), kernel_initializer='random_normal')(merged) x = Dropout(0.5, name='DROPOUT')(x) vad_cont_prediction = Dense(units=3, kernel_initializer='random_normal', name='VAD')(x) # At model instantiation, you specify the two inputs and the output. self.model = Model(inputs=[body_inputs, image_inputs], outputs=vad_cont_prediction, name='EMOTIC-VAD-regression') print('[INFO] Randomly initialised classifier was successfully added on top of the merged modules.') print('[INFO] Number of trainable weights before freezing the conv. bases of the respective original models: ' '' + str(len(self.model.trainable_weights))) # first: train only the top layers (which were randomly initialized) # i.e. freeze all convolutional layers of the preliminary base model for layer in self.body_truncated_model.layers: layer.trainable = False for layer in self.image_truncated_model.layers: layer.trainable = False print('[INFO] Number of trainable weights after freezing the conv. bases of the respective original models: ' '' + str(len(self.model.trainable_weights))) # # reference https://github.com/keras-team/keras/issues/4735#issuecomment-267472549 # self.class_weight = { 'VALENCE': {0: 36.00, 1: 36.00, 2: 12.00, 3: 5.14, 4: 2.25, 5: 1.00, 6: 1.89, 7: 2.57, 8: 12.00, 9: 36.00}, # 'AROUSAL': {0: 23.00, 1: 11.50, 2: 4.60, 3: 1.00, 4: 2.09, 5: 1.64, 6: 1.14, 7: 2.09, 8: 3.83, 9: 4.60}, # 'DOMINANCE': {0: 34.00, 1: 17.00, 2: 11.33, 3: 6.80, 4: 5.66, 5: 1.70, 6: 1.00, 7: 2.42, 8: 3.40, 9: 6.80} # } self.model.compile(optimizer=SGD(lr=1e-5, momentum=0.9), # loss='mse', loss = euclidean_distance_loss, metrics=['mae','mse', rmse]) # print ('[INFO] Metrics names: ',self.model.metrics_names ) print('[INFO] End-to-end `EMOTIC-VAD-regression` model has been successfully compiled.') # -------------------------------------------------------------------------------- # # Configurations # -------------------------------------------------------------------------------- # nb_train_samples = 23706 nb_val_samples = 3332 nb_test_samples = 7280 train_generator_batch_size = 54 val_generator_batch_size = 49 test_generator_batch_size = 52 self.steps_per_epoch = nb_train_samples // train_generator_batch_size self.validation_steps = nb_val_samples // val_generator_batch_size self.test_steps = nb_test_samples // test_generator_batch_size # -------------------------------------------------------------------------------- # # Read the HDF5 file # -------------------------------------------------------------------------------- # # open the hdf5 file hdf5_file = h5py.File(hdf5_file, "r") self.nb_train_data = hdf5_file["x_image_train"].shape[0] self.nb_val_data = hdf5_file["x_image_val"].shape[0] self.nb_test_data = hdf5_file["x_image_test"].shape[0] # -------------------------------------------------------------------------------- # # Instantiate the custom generators # -------------------------------------------------------------------------------- # print('[INFO] Setting up custom generators...') self.train_generator = custom_generator_single_output(hdf5_file=hdf5_file, nb_data=self.nb_train_data, batch_size=train_generator_batch_size, mode='train') self.val_generator = custom_generator_single_output(hdf5_file=hdf5_file, nb_data=self.nb_val_data, batch_size=val_generator_batch_size, mode='val') self.test_generator = custom_generator_single_output(hdf5_file=hdf5_file, nb_data=self.nb_test_data, batch_size=test_generator_batch_size, mode='test') # -------------------------------------------------------------------------------- # # Usage of callbacks # -------------------------------------------------------------------------------- # self.weights_to_file = weights_to_file self.nb_of_epochs = nb_of_epochs # CSVLogger model_log = 'trained_models/logs/' + CSVLogger_filename csv_logger = CSVLogger(model_log, append=True, separator=',') # ModelCheckpoint checkpointer = ModelCheckpoint(filepath=weights_to_file, monitor=modelCheckpoint_quantity, verbose=1, save_best_only=True, mode='auto', period=1, save_weights_only=True) early_stop = EarlyStopping(monitor=earlyStopping_quantity, patience=5, mode='auto') self.callbacks_list = [checkpointer, early_stop, csv_logger]
def compoundNet_feature_extraction(object_centric_model='VGG16', scene_centric_model='VGG16_Places365', fusion_strategy='concatenate', pooling_mode='avg', classes=9, data_augm_enabled=False): """ConvNet as fixed feature extractor, consist of taking the convolutional base of a previously-trained network, running the new data through it, and training a new classifier on top of the output. (i.e. train only the randomly initialized top layers while freezing all convolutional layers of the original model). # Arguments object_centric_model: one of `VGG16`, `VGG19` or `ResNet50` scene_centric_model: `VGG16_Places365` fusion_strategy: one of `concatenate` (feature vectors of different sources are concatenated into one super-vector), `average` (the feature set is averaged) or `maximum` (selects the highest value from the corresponding features). pooling_mode: Optional pooling_mode mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling_mode will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling_mode will be applied. classes: optional number of classes to classify images into, only to be specified if `weights` argument is `None`. data_augm_enabled: whether to use the augmented samples during training. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `object_centric_model`, `pooling_mode`, `fusion_strategy` , `scene_centric_model` or invalid input shape. """ if not (object_centric_model in {'VGG16', 'VGG19', 'ResNet50'}): raise ValueError( 'The `scene_centric_model` argument should be either ' '`VGG16`, `VGG19` or `ResNet50`. Other models will be supported in future releases. ' ) if not (pooling_mode in {'avg', 'max', 'flatten'}): raise ValueError('The `pooling_mode` argument should be either ' '`avg` (GlobalAveragePooling2D), `max` ' '(GlobalMaxPooling2D), ' 'or `flatten` (Flatten).') if not (fusion_strategy in {'concatenate', 'average', 'maximum'}): raise ValueError( 'The `fusion_strategy` argument should be either ' '`concatenate` (feature vectors of different sources are concatenated into one super-vector),' ' `average` (the feature set is averaged) ' 'or `maximum` (selects the highest value from the corresponding features).' ) if not (scene_centric_model in {'VGG16_Places365'}): raise ValueError( 'The `scene_centric_model` argument should be ' '`VGG16_Places365`. Other models will be supported in future releases.' ) # Define the name of the model and its weights weights_name = 'compoundNet_feature_extraction_' \ + object_centric_model + '_' \ + fusion_strategy + '_fusion_' \ + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5' augm_samples_weights_name = 'augm_compoundNet_feature_extraction_' \ + object_centric_model + '_' \ + fusion_strategy + '_fusion_' \ + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5' model_log = logs_dir + 'compoundNet_feature_extraction_' \ + object_centric_model + '_' \ + fusion_strategy + '_fusion_' \ + pooling_mode + '_pool_log.csv' csv_logger = CSVLogger(model_log, append=True, separator=',') input_tensor = Input(shape=(224, 224, 3)) # create the base object_centric_model pre-trained model for warm-up if object_centric_model == 'VGG16': object_base_model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False) elif object_centric_model == 'VGG19': object_base_model = VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) elif object_centric_model == 'ResNet50': tmp_model = ResNet50(input_tensor=input_tensor, weights='imagenet', include_top=False) object_base_model = Model( inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output) print('\n \n') print('The plain, object-centric `' + object_centric_model + '` pre-trained convnet was successfully initialised.\n') scene_base_model = VGG16_Places365(input_tensor=input_tensor, weights='places', include_top=False) print('The plain, scene-centric `' + scene_centric_model + '` pre-trained convnet was successfully initialised.\n') # retrieve the ouputs object_base_model_output = object_base_model.output scene_base_model_output = scene_base_model.output # We will feed the extracted features to a merging layer if fusion_strategy == 'concatenate': merged = concatenate( [object_base_model_output, scene_base_model_output]) elif fusion_strategy == 'average': merged = average([object_base_model_output, scene_base_model_output]) else: merged = maximum([object_base_model_output, scene_base_model_output]) if pooling_mode == 'avg': x = GlobalAveragePooling2D(name='GAP')(merged) elif pooling_mode == 'max': x = GlobalMaxPooling2D(name='GMP')(merged) elif pooling_mode == 'flatten': x = Flatten(name='FLATTEN')(merged) x = Dense(256, activation='relu', name='FC1')(x) # let's add a fully-connected layer # When random init is enabled, we want to include Dropout, # otherwise when loading a pre-trained HRA model we want to omit # Dropout layer so the visualisations are done properly (there is an issue if it is included) x = Dropout(0.5, name='DROPOUT')(x) # and a logistic layer with the number of classes defined by the `classes` argument predictions = Dense(classes, activation='softmax', name='PREDICTIONS')(x) # new softmax layer # this is the transfer learning model we will train model = Model(inputs=object_base_model.input, outputs=predictions) print( 'Randomly initialised classifier was successfully added on top of the merged outputs. \n' ) print( 'Number of trainable weights before freezing the conv. bases of the respective original models: ' '' + str(len(model.trainable_weights))) # first: train only the top layers (which were randomly initialized) # i.e. freeze all convolutional layers of the preliminary base model for layer in object_base_model.layers: layer.trainable = False for layer in scene_base_model.layers: layer.trainable = False print( 'Number of trainable weights after freezing the conv. bases of the respective original models: ' '' + str(len(model.trainable_weights))) print('\n') # compile the warm_up_model (should be done *after* setting layers to non-trainable) model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() # # The attribute model.metrics_names will give you the display labels for the scalar outputs. # print warm_up_model.metrics_names if data_augm_enabled: print( 'Using augmented samples for training. This may take a while ! \n') t = now() history = model.fit_generator(augmented_train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=feature_extraction_epochs, callbacks=[csv_logger], class_weight=class_weight) print( 'Training time for re-training the last Dense layer using augmented samples: %s' % (now() - t)) model.save_weights(feature_extraction_dir + augm_samples_weights_name) print('Model weights using augmented samples were saved as `' + augm_samples_weights_name + '`') print('\n') else: t = now() history = model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=feature_extraction_epochs, callbacks=[csv_logger], class_weight=class_weight) print('Training time for re-training the last Dense layer: %s' % (now() - t)) model.save_weights(feature_extraction_dir + weights_name) print('Model weights were saved as `' + weights_name + '`') print('\n') return model
def feature_extraction(pre_trained_model='VGG16', pooling_mode='avg', classes=9, data_augm_enabled = False): """ConvNet as fixed feature extractor, consist of taking the convolutional base of a previously-trained network, running the new data through it, and training a new classifier on top of the output. (i.e. train only the randomly initialized top layers while freezing all convolutional layers of the original model). # Arguments pre_trained_model: one of `VGG16`, `VGG19`, `ResNet50`, `VGG16_Places365` pooling_mode: Optional pooling_mode mode for feature extraction - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling_mode will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling_mode will be applied. classes: optional number of classes to classify images into, only to be specified if `weights` argument is `None`. data_augm_enabled: whether to augment the samples during training # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `pre_trained_model`, `pooling_mode` or invalid input shape. """ if not (pre_trained_model in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}): raise ValueError('The `pre_trained_model` argument should be either ' '`VGG16`, `VGG19`, `ResNet50`, ' 'or `VGG16_Places365`. Other models will be supported in future releases. ') if not (pooling_mode in {'avg', 'max', 'flatten'}): raise ValueError('The `pooling_mode` argument should be either ' '`avg` (GlobalAveragePooling2D), `max` ' '(GlobalMaxPooling2D), ' 'or `flatten` (Flatten).') # Define the name of the model and its weights weights_name = 'feature_extraction_' + pre_trained_model + '_' + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5' augm_samples_weights_name = 'augm_feature_extraction_' + pre_trained_model + '_' + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5' model_log = logs_dir + 'feature_extraction_' + pre_trained_model + '_' + pooling_mode + '_pool_log.csv' csv_logger = CSVLogger(model_log, append=True, separator=',') input_tensor = Input(shape=(224, 224, 3)) # create the base pre-trained model for warm-up if pre_trained_model == 'VGG16': base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor) elif pre_trained_model == 'VGG19': base_model = VGG19(weights='imagenet', include_top=False, input_tensor=input_tensor) elif pre_trained_model == 'ResNet50': base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor) elif pre_trained_model == 'VGG16_Places365': base_model = VGG16_Places365(weights='places', include_top=False, input_tensor=input_tensor) print ('\n \n') print('The plain `' + pre_trained_model + '` pre-trained convnet was successfully initialised.\n') x = base_model.output # Now we set-up transfer learning process - freeze all but the penultimate layer # and re-train the last Dense layer with 9 final outputs representing probabilities for HRA classes. # Build a randomly initialised classifier model to put on top of the convolutional model # both `avg`and `max`result in the same size of the Dense layer afterwards # Both Flatten and GlobalAveragePooling2D are valid options. So is GlobalMaxPooling2D. # Flatten will result in a larger Dense layer afterwards, which is more expensive # and may result in worse overfitting. But if you have lots of data, it might also perform better. # https://github.com/keras-team/keras/issues/8470 if pooling_mode == 'avg': x = GlobalAveragePooling2D(name='GAP')(x) elif pooling_mode == 'max': x = GlobalMaxPooling2D(name='GMP')(x) elif pooling_mode == 'flatten': x = Flatten(name='FLATTEN')(x) x = Dense(256, activation='relu', name='FC1')(x) # let's add a fully-connected layer # When random init is enabled, we want to include Dropout, # otherwise when loading a pre-trained HRA model we want to omit # Dropout layer so the visualisations are done properly (there is an issue if it is included) x = Dropout(0.5, name='DROPOUT')(x) # and a logistic layer with the number of classes defined by the `classes` argument predictions = Dense(classes, activation='softmax', name='PREDICTIONS')(x) # new softmax layer # this is the transfer learning model we will train model = Model(inputs=base_model.input, outputs=predictions) print('Randomly initialised classifier was successfully added on top of the original pre-trained conv. base. \n') print('Number of trainable weights before freezing the conv. base of the original pre-trained convnet: ' '' + str(len(model.trainable_weights))) # first: train only the top layers (which were randomly initialized) # i.e. freeze all convolutional layers of the preliminary base model for layer in base_model.layers: layer.trainable = False print('Number of trainable weights after freezing the conv. base of the pre-trained convnet: ' '' + str(len(model.trainable_weights))) print ('\n') # compile the warm_up_model (should be done *after* setting layers to non-trainable) model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() # # The attribute model.metrics_names will give you the display labels for the scalar outputs. # print warm_up_model.metrics_names if data_augm_enabled: print('Using augmented samples for training. This may take a while ! \n') t = now() history = model.fit_generator(augmented_train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=feature_extraction_epochs, callbacks=[csv_logger]) print('Training time for re-training the last Dense layer using augmented samples: %s' % (now() - t)) model.save_weights(feature_extraction_dir + augm_samples_weights_name) print( 'Model weights using augmented samples were saved as `' + augm_samples_weights_name + '`') print ('\n') else: t = now() history = model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=feature_extraction_epochs, callbacks=[csv_logger]) print('Training time for re-training the last Dense layer: %s' % (now() - t)) model.save_weights(feature_extraction_dir + weights_name) print('Model weights were saved as `' + weights_name + '`') print ('\n') return model
def HRA_VGG16_Places365(include_top=True, weights='HRA', input_tensor=None, input_shape=None, nb_of_conv_layers_to_fine_tune=None, first_phase_trained_weights=None, violation_class='cl', verbose=0): """Instantiates the VGG16-Places365 architecture fine-tuned (2 steps) on Human Rights Archive dataset. Optionally loads weights pre-trained on the 2 class version of Human Rights Archive Database. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'HRA' (pre-training on Human Rights Archive), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 input channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. nb_of_conv_layers_to_fine_tune: integer to indicate the number of convolutional layers to fine-tune. One of `1` (2,499,360 trainable params), `2` (4,859,168 trainable params) or `3` (7,218,976 trainable params). first_phase_trained_weights: Weights of an already trained Keras model instance. Only relevant when using `fine_tuning` as train_mode after `feature_extraction` weights have been saved. violation_class: one of `cl` (HRA dataset with 2 classes - [i]'child_labour' and [ii]'no violation') or `dp` (HRA dataset with 2 classes - [i]'displaced_populations' and [ii]'no violation') verbose: Integer. 0, or 1. Verbosity mode. 0 = silent, 1 = model summary and weights info. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, `violation_class`, `nb_of_conv_layers_to_fine_tune` or invalid input shape """ if not (weights in {'HRA', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `HRA` ' '(pre-training on Human Rights Archive two-class), ' 'or the path to the weights file to be loaded.') if not (violation_class in {'cl', 'dp'}): raise ValueError( "The `violation_class` argument should be either " "`cl` (HRA dataset with 2 classes - [i]'child_labour' and [ii]'no violation') " "'or `dp` (HRA dataset with 2 classes - [i]'displaced_populations' and [ii]'no violation')" ) if nb_of_conv_layers_to_fine_tune is None and include_top is False: raise ValueError( 'Setting the `include_top` argument as false ' 'is only relevant when the `nb_of_conv_layers_to_fine_tune` argument is not None (feature extraction), ' 'otherwise the returned model would be exactly the default ' 'keras-applications model.') if weights == 'HRA' and first_phase_trained_weights is not None: raise ValueError( 'Setting the `first_phase_trained_weights` argument as the path to the weights file ' 'obtained from utilising feature_extraction ' 'is only relevant when the `weights` argument is `None`. ' 'If the `weights` argument is `HRA`, it means the model has already been trained on HRA dataset ' 'and there is no need to provide a path to the weights file (saved from feature_extraction) to be loaded.' ) if not (nb_of_conv_layers_to_fine_tune in {1, 2, 3, None}): raise ValueError( 'The `nb_of_conv_layers_to_fine_tune` argument should be either ' '`None` (indicates feature extraction mode), ' '`1`, `2` or `3`. ' 'More than 3 conv. layers are not supported because the more parameters we are training , ' 'the more we are at risk of overfitting.') cache_subdir = 'AbuseNet' mode = _obtain_train_mode( nb_of_conv_layers_to_fine_tune=nb_of_conv_layers_to_fine_tune) # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Ensure that the model takes into account any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # create the base pre-trained model base_model = VGG16_Places365(weights='places', include_top=False, input_tensor=img_input) x = base_model.output # Classification block - build a classifier model to put on top of the convolutional model if include_top: # add a global spatial pooling layer (which seems to have the best performance) x = GlobalAveragePooling2D(name='GAP')(x) # add a fully-connected layer x = Dense(256, activation='relu', name='FC1')(x) # When random init is enabled, we want to include Dropout, # otherwise when loading a pre-trained HRA model we want to omit that layer, # so the visualisations are done properly (there is an issue if it is included) if weights is None: x = Dropout(0.5, name='DROPOUT')(x) # and a logistic layer with the number of classes defined by the `classes` argument x = Dense(2, activation='softmax', name='PREDICTIONS')(x) model = Model(inputs=inputs, outputs=x, name='HRA-2CLASS-VGG16_Places365') else: model = Model(inputs=inputs, outputs=x, name='HRA-2CLASS-VGG16_Places365-NO-TOP') model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) return model if mode == 'feature_extraction': print('[INFO] Feature extraction mode. \n') if verbose == 1: print( '[INFO] Number of trainable weights before freezing the conv. base of the original pre-trained convnet: ' '' + str(len(model.trainable_weights))) for layer in base_model.layers: layer.trainable = False if verbose == 1: print( '[INFO] Number of trainable weights after freezing the conv. base of the original pre-trained convnet: ' '' + str(len(model.trainable_weights))) model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) elif mode == 'fine_tuning': if nb_of_conv_layers_to_fine_tune == 1: # Uncomment for extra verbosity # print('[INFO] Fine-tuning of the last one (1) conv. layer. \n') if verbose == 1: print( '[INFO] Number of trainable weights before unfreezing the last conv. layer of the model with the retrained classifier: ' '' + str(len(model.trainable_weights))) for layer in model.layers[:17]: layer.trainable = False for layer in model.layers[17:]: layer.trainable = True if verbose == 1: print( '[INFO] Number of trainable weights after unfreezing the last conv. layer of the model with the retrained classifier: ' '' + str(len(model.trainable_weights))) elif nb_of_conv_layers_to_fine_tune == 2: # Uncomment for extra verbosity # print('[INFO] Fine-tuning of the last two (2) conv. layers. \n') if verbose == 1: print( '[INFO] Number of trainable weights before unfreezing the last two (2) conv. layers of the model with the retrained classifier: ' '' + str(len(model.trainable_weights))) for layer in model.layers[:16]: layer.trainable = False for layer in model.layers[16:]: layer.trainable = True if verbose == 1: print( '[INFO] Number of trainable weights after unfreezing the last two (2) conv. layers of the model with the retrained classifier: ' '' + str(len(model.trainable_weights))) elif nb_of_conv_layers_to_fine_tune == 3: # Uncomment for extra verbosity # print('[INFO] Fine-tuning of the last three (3) conv. layers. \n') if verbose == 1: print( '[INFO] Number of trainable weights before unfreezing the last three (3) conv. layers of the model with the retrained classifier: ' '' + str(len(model.trainable_weights))) for layer in model.layers[:15]: layer.trainable = False for layer in model.layers[15:]: layer.trainable = True if verbose == 1: print( '[INFO] Number of trainable weights after unfreezing the last three (3) conv. layers of the model with the retrained classifier: ' '' + str(len(model.trainable_weights))) model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) if verbose == 1: model.summary() # load weights if weights == 'HRA': # Child labour if violation_class == 'cl': if include_top: if mode == 'feature_extraction': weights_path = get_file(CL_FEATURE_EXTRACTION_FNAME, CL_WEIGHTS_FEATURE_EXTRACTION_PATH, cache_subdir=cache_subdir) elif mode == 'fine_tuning': if nb_of_conv_layers_to_fine_tune == 1: weights_path = get_file(CL_PATH_ONE_CONV_LAYER_FNAME, CL_WEIGHTS_PATH_ONE_CONV_LAYER, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 2: weights_path = get_file( CL_PATH_TWO_CONV_LAYERS_FNAME, CL_WEIGHTS_PATH_TWO_CONV_LAYERS, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 3: weights_path = get_file( CL_PATH_THREE_CONV_LAYERS_FNAME, CL_WEIGHTS_PATH_THREE_CONV_LAYERS, cache_subdir=cache_subdir) # no top else: if nb_of_conv_layers_to_fine_tune == 1: weights_path = get_file( CL_PATH_ONE_CONV_LAYER_NO_TOP_FNAME, CL_WEIGHTS_PATH_ONE_CONV_LAYER_NO_TOP, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 2: weights_path = get_file( CL_PATH_TWO_CONV_LAYERS_NO_TOP_FNAME, CL_WEIGHTS_PATH_TWO_CONV_LAYERS_NO_TOP, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 3: weights_path = get_file( CL_PATH_THREE_CONV_LAYERS_NO_TOP_FNAME, CL_WEIGHTS_PATH_THREE_CONV_LAYERS_NO_TOP, cache_subdir=cache_subdir) # Displaced populations elif violation_class == 'dp': if include_top: if mode == 'feature_extraction': weights_path = get_file(DP_FEATURE_EXTRACTION_FNAME, DP_WEIGHTS_FEATURE_EXTRACTION_PATH, cache_subdir=cache_subdir) elif mode == 'fine_tuning': if nb_of_conv_layers_to_fine_tune == 1: weights_path = get_file(DP_PATH_ONE_CONV_LAYER_FNAME, DP_WEIGHTS_PATH_ONE_CONV_LAYER, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 2: weights_path = get_file( DP_PATH_TWO_CONV_LAYERS_FNAME, DP_WEIGHTS_PATH_TWO_CONV_LAYERS, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 3: weights_path = get_file( DP_PATH_THREE_CONV_LAYERS_FNAME, DP_WEIGHTS_PATH_THREE_CONV_LAYERS, cache_subdir=cache_subdir) # no top else: if nb_of_conv_layers_to_fine_tune == 1: weights_path = get_file( DP_PATH_ONE_CONV_LAYER_NO_TOP_FNAME, DP_WEIGHTS_PATH_ONE_CONV_LAYER_NO_TOP, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 2: weights_path = get_file( DP_PATH_TWO_CONV_LAYERS_NO_TOP_FNAME, DP_WEIGHTS_PATH_TWO_CONV_LAYERS_NO_TOP, cache_subdir=cache_subdir) elif nb_of_conv_layers_to_fine_tune == 3: weights_path = get_file( DP_PATH_THREE_CONV_LAYERS_NO_TOP_FNAME, DP_WEIGHTS_PATH_THREE_CONV_LAYERS_NO_TOP, cache_subdir=cache_subdir) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def __init__(self, pre_trained_model): """ Base class for feature extraction. :param pre_trained_model: one of `VGG16`, `VGG19`, `ResNet50`, `VGG16_Places365` """ # Base directory of raw jpg/png images base_dir = '/home/gkallia/git/Human-Rights-Archive-CNNs/datasets/Human_Rights_Archive_DB' train_dir = os.path.join(base_dir, 'train_val') test_dir = os.path.join(base_dir, 'test') self.nb_train_samples = 3050 self.nb_test_samples = 270 # human_rights_classes = ['arms', 'child_labour', 'child_marriage', 'detention_centres', # 'disability_rights', 'displaced_populations', 'environment', # 'no_violation', 'out_of_school'] # Augmentation configuration with only rescaling. # Rescale is a value by which we will multiply the data before any other processing. # Our original images consist in RGB coefficients in the 0-255, but such values would # be too high for our models to process (given a typical learning rate), # so we target values between 0 and 1 instead by scaling with a 1/255. factor. datagen = ImageDataGenerator(rescale=1. / 255) img_width, img_height = 224, 224 self.train_batch_size = 25 self.test_batch_size = 15 self.train_generator = datagen.flow_from_directory( train_dir, target_size=(img_width, img_height), class_mode='categorical', batch_size=self.train_batch_size) self.test_generator = datagen.flow_from_directory( test_dir, target_size=(img_width, img_height), class_mode='categorical', batch_size=self.test_batch_size) if not (pre_trained_model in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}): raise ValueError( 'The `pre_trained_model` argument should be either ' '`VGG16`, `VGG19`, `ResNet50`, ' 'or `VGG16_Places365`. Other models will be supported in future releases. ' ) input_tensor = Input(shape=(224, 224, 3)) # create the base pre-trained model for warm-up if pre_trained_model == 'VGG16': self.conv_base = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor) elif pre_trained_model == 'VGG19': self.conv_base = VGG19(weights='imagenet', include_top=False, input_tensor=input_tensor) elif pre_trained_model == 'ResNet50': self.conv_base = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor) elif pre_trained_model == 'VGG16_Places365': self.conv_base = VGG16_Places365(weights='places', include_top=False, input_tensor=input_tensor) self.bottleneck_train_features_filename = 'bottleneck_train_features_' + pre_trained_model + '.npy' self.bottleneck_train_labels_filename = 'bottleneck_train_labels_' + pre_trained_model + '.npy' self.bottleneck_test_features_filename = 'bottleneck_test_features_' + pre_trained_model + '.npy' self.bottleneck_test_labels_filename = 'bottleneck_test_labels_' + pre_trained_model + '.npy' self.cache_subdir = 'HRA_models' self.pre_trained_model = pre_trained_model
def EMOTIC_VAD_ResNet50(include_top=True, weights='emotic'): """Instantiates the EMOTIC_VAD_ResNet50 architecture. Optionally loads weights pre-trained on EMOTIC. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'emotic' (pre-training on EMOTIC), or the path to the weights file to be loaded. classes: optional number of discrete emotion classes to classify images into. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights` """ if not (weights in {'emotic', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `emotic` ' '(pre-training on EMOTIC dataset), ' 'or the path to the weights file to be loaded.') body_inputs = Input(shape=(224, 224, 3), name='INPUT') image_inputs = Input(shape=(224, 224, 3), name='INPUT') # Body module tmp_model = ResNet50(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg') body_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer(index=169).output) # body_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output) for layer in body_truncated_model.layers: layer.name = str("body-") + layer.name # Image module image_truncated_model = VGG16_Places365(include_top=False, weights='places', input_tensor=image_inputs, pooling='avg') for layer in image_truncated_model.layers: layer.name = str("image-") + layer.name # retrieve the ouputs body_plain_model_output = body_truncated_model.output image_plain_model_output = image_truncated_model.output # In case ResNet50 is selected we need to use a global average pooling layer to follow the process used for the othe CNNs. body_plain_model_output = GlobalAveragePooling2D( name='GAP')(body_plain_model_output) merged = concatenate([body_plain_model_output, image_plain_model_output]) x = Dense(256, activation='relu', name='FC1', kernel_regularizer=regularizers.l2(0.01), kernel_initializer='random_normal')(merged) x = Dropout(0.5, name='DROPOUT')(x) vad_cont_prediction = Dense(units=3, kernel_initializer='random_normal', name='VAD')(x) # At model instantiation, you specify the two inputs and the output. model = Model(inputs=[body_inputs, image_inputs], outputs=vad_cont_prediction, name='EMOTIC-VAD-regression-ResNet50') for layer in body_truncated_model.layers: layer.trainable = False for layer in image_truncated_model.layers: layer.trainable = False model.compile(optimizer=SGD(lr=1e-5, momentum=0.9), loss=euclidean_distance_loss, metrics=['mae', 'mse', rmse]) # load weights if weights == 'emotic': if include_top: weights_path = get_file( 'emotic_vad_ResNet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='AbuseNet') else: weights_path = get_file( 'emotic_vad_ResNet50_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='AbuseNet') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def HRA_VGG16_Places365(include_top=True, weights='HRA', input_tensor=None, input_shape=None, mode='fine_tuning', pooling_mode='avg', classes=9, data_augm_enabled=False): """Instantiates the VGG16_Places365 architecture fine-tuned (2 steps) on Human Rights Archive dataset. Optionally loads weights pre-trained on Human Rights Archive Database. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'HRA' (pre-training on Human Rights Archive), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 input channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. mode: one of `TL` (transfer learning - freeze all but the penultimate layer and re-train the last Dense layer) or `FT` (fine-tuning - unfreeze the lower convolutional layers and retrain more layers) , pooling_mode: Pooling mode that will be applied to the output of the last convolutional layer of the original model and thus the output of the model will be a 2D tensor. - `avg` means that global average pooling_mode operation for spatial data will be applied. - `max` means that global max pooling_mode operation for spatial data will be applied. - `flatten` means that the output of the the last convolutional layer of the original model will be flatten, resulting in a larger Dense layer afterwards. classes: optional number of classes to classify images into. data_augm_enabled: whether to use the augmented samples during training. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape """ if not (weights in {'HRA', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `HRA` ' '(pre-training on Human Rights Archive), ' 'or the path to the weights file to be loaded.') if not (mode in {'feature_extraction', 'fine_tuning'}): raise ValueError( 'The `mode` argument should be either ' '`feature_extraction` (freeze all but the penultimate layer and re-train the last Dense layer),' 'or `fine_tuning` (unfreeze the lower convolutional layers and retrain more layers). ' ) if not (pooling_mode in {'avg', 'max', 'flatten'}): raise ValueError( 'The `pooling_mode` argument should be either ' '`avg` (global average pooling_mode), `max` (global max pooling_mode)' 'or `flatten` (the output will be flatten). ') if mode == 'feature_extraction' and include_top is False: raise ValueError( 'The `include_top` argument can be set as false only ' 'when the `mode` argument is `fine_tuning`. ' 'If not, the returned model would have been literally the default ' 'keras-applications model and not the one trained on HRA.') cache_subdir = 'hra_models_fewer_params' # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Ensure that the model takes into account any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # create the base pre-trained model base_model = VGG16_Places365(weights='places', include_top=False, input_tensor=img_input) x = base_model.output # Classification block - build a classifier model to put on top of the convolutional model if include_top: # add a global spatial pooling_mode layer or flatten the obtained output from the original model if pooling_mode == 'avg': x = GlobalAveragePooling2D(name='GAP')(x) elif pooling_mode == 'max': x = GlobalMaxPooling2D(name='GMP')(x) elif pooling_mode == 'flatten': x = Flatten(name='FLATTEN')(x) # add a fully-connected layer x = Dense(256, activation='relu', name='FC1')(x) # When random init is enabled, we want to include Dropout, # otherwise when loading a pre-trained HRA model we want to omit that layer, # so the visualisations are done properly (there is an issue if it is included) if weights is None: x = Dropout(0.5, name='DROPOUT')(x) # and a logistic layer with the number of classes defined by the `classes` argument x = Dense(classes, activation='softmax', name='PREDICTIONS')(x) model = Model(inputs=inputs, outputs=x, name='HRA-VGG16_Places365') # load weights if weights == 'HRA': if include_top: if mode == 'feature_extraction': for layer in base_model.layers: layer.trainable = False model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy') if data_augm_enabled: if pooling_mode == 'avg': weights_path = get_file( AUGM_FEATURE_EXTRACTION_AVG_POOL_fname, AUGM_FEATURE_EXTRACTION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FEATURE_EXTRACTION_FLATTEN_POOL_fname, AUGM_FEATURE_EXTRACTION_FLATTEN_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FEATURE_EXTRACTION_MAX_POOL_fname, AUGM_FEATURE_EXTRACTION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) else: if pooling_mode == 'avg': weights_path = get_file( FEATURE_EXTRACTION_AVG_POOL_fname, FEATURE_EXTRACTION_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FEATURE_EXTRACTION_FLATTEN_POOL_fname, FEATURE_EXTRACTION_FLATTEN_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FEATURE_EXTRACTION_MAX_POOL_fname, FEATURE_EXTRACTION_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif mode == 'fine_tuning': if pooling_mode == 'flatten': for layer in model.layers[:17]: layer.trainable = False for layer in model.layers[17:]: layer.trainable = True else: for layer in model.layers[:16]: layer.trainable = False for layer in model.layers[16:]: layer.trainable = True model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) if data_augm_enabled: if pooling_mode == 'avg': weights_path = get_file( AUGM_FINE_TUNING_AVG_POOL_fname, AUGM_FINE_TUNING_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( AUGM_FINE_TUNING_FLATTEN_POOL_fname, AUGM_FINE_TUNING_FLATTEN_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( AUGM_FINE_TUNING_MAX_POOL_fname, AUGM_FINE_TUNING_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) else: if pooling_mode == 'avg': weights_path = get_file( FINE_TUNING_AVG_POOL_fname, FINE_TUNING_AVG_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'flatten': weights_path = get_file( FINE_TUNING_FLATTEN_POOL_fname, FINE_TUNING_FLATTEN_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) elif pooling_mode == 'max': weights_path = get_file( FINE_TUNING_MAX_POOL_fname, FINE_TUNING_MAX_POOL_WEIGHTS_PATH, cache_subdir=cache_subdir) else: weights_path = get_file(FINE_TUNING_WEIGHTS_PATH_NO_TOP_fname, FINE_TUNING_WEIGHTS_PATH_NO_TOP, cache_subdir=cache_subdir) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model