def CompoundNet_VGG19(include_top=True,
                      weights=None,
                      input_tensor=None,
                      input_shape=None,
                      fusion_strategy='concatenate',
                      mode='fine_tuning',
                      pooling_mode='avg',
                      classes=9,
                      data_augm_enabled=False):
    """Instantiates the CompoundNet VGG19 architecture fine-tuned (2 steps) on Human Rights Archive dataset.

        Optionally loads weights pre-trained on Human Rights Archive Database.

        # Arguments
            include_top: whether to include the 3 fully-connected
                layers at the top of the network.
            weights: one of `None` (random initialization),
                'HRA' (pre-training on Human Rights Archive),
                or the path to the weights file to be loaded.
            input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
                to use as image input for the model.
            input_shape: optional shape tuple, only to be specified
                if `include_top` is False (otherwise the input shape
                has to be `(224, 224, 3)` (with `channels_last` data format)
                or `(3, 224, 224)` (with `channels_first` data format).
                It should have exactly 3 input channels,
                and width and height should be no smaller than 48.
                E.g. `(200, 200, 3)` would be one valid value.
            fusion_strategy: one of `concatenate` (feature vectors of different sources are concatenated into one super-vector),
                `average` (the feature set is averaged)
                or `maximum` (selects the highest value from the corresponding features).
            mode: one of `feature_extraction` (freeze all but the penultimate layer and re-train the last Dense layer)
                or `fine_tuning` (unfreeze the lower convolutional layers and retrain more layers).
            pooling_mode: Optional pooling_mode mode for feature extraction
                when `include_top` is `False`.
                - `None` means that the output of the model will be
                    the 4D tensor output of the
                    last convolutional layer.
                - `avg` means that global average pooling_mode
                    will be applied to the output of the
                    last convolutional layer, and thus
                    the output of the model will be a 2D tensor.
                - `max` means that global max pooling_mode will
                    be applied.
            classes: optional number of classes to classify images into,
                                only to be specified if `weights` argument is `None`.
            data_augm_enabled: whether to use the augmented samples during training.

        # Returns
            A Keras model instance.

        # Raises
            ValueError: in case of invalid argument for `weights`.
        """

    if not (weights in {'HRA', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `HRA` '
                         '(pre-training on Human Rights Archive), '
                         'or the path to the weights file to be loaded.')

    if not (fusion_strategy in {'concatenate', 'average', 'maximum'}):
        raise ValueError(
            'The `fusion_strategy` argument should be either '
            '`concatenate` (feature vectors of different sources are concatenated into one super-vector), '
            '`average` (the feature set is averaged) '
            'or `maximum` (selects the highest value from the corresponding features).'
        )

    if not (pooling_mode in {'avg', 'max', 'flatten'}):
        raise ValueError('The `pooling_mode` argument should be either '
                         '`avg` (GlobalAveragePooling2D), `max` '
                         '(GlobalMaxPooling2D), '
                         'or `flatten` (Flatten).')

    if weights == 'HRA' and classes != 9:
        raise ValueError(
            'If using `weights` as Human Rights Archive, `classes` should be 9.'
        )

    cache_subdir = 'HRA_models'

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    input_tensor = Input(shape=(224, 224, 3))

    object_centric_model = VGG19(input_tensor=input_tensor,
                                 weights='imagenet',
                                 include_top=False)

    scene_centric_model = VGG16_Places365(input_tensor=input_tensor,
                                          weights='places',
                                          include_top=False)

    # retrieve the ouputs
    object_model_output = object_centric_model.output
    scene_model_output = scene_centric_model.output

    # We will feed the extracted features to a merging layer
    if fusion_strategy == 'concatenate':
        merged = concatenate([object_model_output, scene_model_output])

    elif fusion_strategy == 'average':
        merged = average([object_model_output, scene_model_output])

    else:
        merged = maximum([object_model_output, scene_model_output])

    if include_top:
        if pooling_mode == 'avg':
            x = GlobalAveragePooling2D(name='GAP')(merged)
        elif pooling_mode == 'max':
            x = GlobalMaxPooling2D(name='GMP')(merged)
        elif pooling_mode == 'flatten':
            x = Flatten(name='FLATTEN')(merged)

        x = Dense(256, activation='relu',
                  name='FC1')(x)  # let's add a fully-connected layer

        # When random init is enabled, we want to include Dropout,
        # otherwise when loading a pre-trained HRA model we want to omit
        # Dropout layer so the visualisations are done properly (there is an issue if it is included)
        if weights is None:
            x = Dropout(0.5, name='DROPOUT')(x)
        # and a logistic layer with the number of classes defined by the `classes` argument
        x = Dense(classes, activation='softmax',
                  name='PREDICTIONS')(x)  # new softmax layer

    # Ensure that the model takes into account any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # this is the transfer learning model we will train
    model = Model(inputs=inputs, outputs=x, name='CompoundNet-VGG19')

    # load weights
    if weights == 'HRA':
        if include_top:
            if mode == 'feature_extraction':
                for layer in object_centric_model.layers:
                    layer.trainable = False

                for layer in scene_centric_model.layers:
                    layer.trainable = False

                model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                              loss='categorical_crossentropy')

                if data_augm_enabled:

                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_fname,
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_fname,
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_fname,
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)
                else:
                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_fname,
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_fname,
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_fname,
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_fname,
                                FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_fname,
                                FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_fname,
                                FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_fname,
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_fname,
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_fname,
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

            elif mode == 'fine_tuning':
                for layer in model.layers[:36]:
                    layer.trainable = False
                for layer in model.layers[36:]:
                    layer.trainable = True

                model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                              loss='categorical_crossentropy')

                if data_augm_enabled:

                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_fname,
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_FLATTEN_fname,
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_fname,
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_AVERAGE_FUSION_AVG_POOL_fname,
                                AUGM_FINE_TUNING_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_AVERAGE_FUSION_FLATTEN_fname,
                                AUGM_FINE_TUNING_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_AVERAGE_FUSION_MAX_POOL_fname,
                                AUGM_FINE_TUNING_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_fname,
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_FLATTEN_fname,
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_fname,
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)
                else:

                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_fname,
                                FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FINE_TUNING_CONCATENATE_FUSION_FLATTEN_fname,
                                FINE_TUNING_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_fname,
                                FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FINE_TUNING_AVERAGE_FUSION_AVG_POOL_fname,
                                FINE_TUNING_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FINE_TUNING_AVERAGE_FUSION_FLATTEN_fname,
                                FINE_TUNING_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FINE_TUNING_AVERAGE_FUSION_MAX_POOL_fname,
                                FINE_TUNING_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_fname,
                                FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FINE_TUNING_MAXIMUM_FUSION_FLATTEN_fname,
                                FINE_TUNING_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_fname,
                                FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

        else:
            if fusion_strategy == 'average':
                weights_path = get_file(
                    FINE_TUNING_AVERAGE_FUSION_NO_TOP_fname,
                    FINE_TUNING_AVERAGE_FUSION_WEIGHTS_PATH_NO_TOP,
                    cache_subdir=cache_subdir)

            elif fusion_strategy == 'concatenate':
                weights_path = get_file(
                    FINE_TUNING_CONCATENATE_FUSION_NO_TOP_fname,
                    FINE_TUNING_CONCATENATE_FUSION_WEIGHTS_PATH_NO_TOP,
                    cache_subdir=cache_subdir)

            elif fusion_strategy == 'maximum':
                weights_path = get_file(
                    FINE_TUNING_MAXIMUM_FUSION_NO_TOP_fname,
                    FINE_TUNING_MAXIMUM_FUSION_WEIGHTS_PATH_NO_TOP,
                    cache_subdir=cache_subdir)

        model.load_weights(weights_path)

    return model
예제 #2
0
    def __init__(self,
                 hdf5_file,
                 body_backbone_CNN,
                 image_backbone_CNN,
                 nb_of_epochs,
                 weights_to_file,
                 modelCheckpoint_quantity,
                 earlyStopping_quantity,
                 CSVLogger_filename):


        if not (body_backbone_CNN in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}):
            raise ValueError('The `body_backbone_CNN` argument should be either '
                             '`VGG16`, `VGG19`, `ResNet50` or `VGG16_Places365`. ')

        if not (image_backbone_CNN in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}):
            raise ValueError('The `image_backbone_CNN` argument should be either '
                             '`VGG16`, `VGG19`, `ResNet50` or `VGG16_Places365`. ')

        self.body_backbone_CNN = body_backbone_CNN
        self.image_backbone_CNN = image_backbone_CNN

        # -------------------------------------------------------------------------------- #
        #                             Construct EMOTIC model
        # -------------------------------------------------------------------------------- #

        body_inputs = Input(shape=(224, 224, 3), name='INPUT')
        image_inputs = Input(shape=(224, 224, 3), name='INPUT')

        # Body module
        if 'VGG16' == body_backbone_CNN:
            self.body_truncated_model = VGG16(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg')

        elif 'VGG19' == body_backbone_CNN:
            self.body_truncated_model = VGG19(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg')

        elif 'ResNet50' == body_backbone_CNN:
            tmp_model = ResNet50(include_top=False, weights='imagenet', input_tensor=body_inputs, pooling='avg')
            self.body_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output)

        elif 'VGG16_Places365' == body_backbone_CNN:
            self.body_truncated_model = VGG16_Places365(include_top=False, weights='places', input_tensor=body_inputs, pooling='avg')

        for layer in self.body_truncated_model.layers:
            layer.name = str("body-") + layer.name


        print('[INFO] The plain, body `' + body_backbone_CNN + '` pre-trained convnet was successfully initialised.')

        # Image module
        if 'VGG16' == image_backbone_CNN:
            self.image_truncated_model = VGG16(include_top=False, weights='imagenet', input_tensor=image_inputs, pooling='avg')

        elif 'VGG19' == image_backbone_CNN:
            self.image_truncated_model = VGG19(include_top=False, weights='imagenet', input_tensor=image_inputs, pooling='avg')

        elif 'ResNet50' == image_backbone_CNN:
            tmp_model = ResNet50(include_top=False, weights='imagenet',input_tensor=image_inputs, pooling='avg')
            self.image_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output)

        elif 'VGG16_Places365' == image_backbone_CNN:
            self.image_truncated_model = VGG16_Places365(include_top=False, weights='places', input_tensor=image_inputs, pooling='avg')

        for layer in self.image_truncated_model.layers:
            layer.name = str("image-") + layer.name

        print('[INFO] The plain, image `' + image_backbone_CNN + '` pre-trained convnet was successfully initialised.')

        # retrieve the ouputs
        body_plain_model_output = self.body_truncated_model.output
        image_plain_model_output = self.image_truncated_model.output


        # In case ResNet50 is selected we need to use a global average pooling layer to follow the process used for the othe CNNs.
        if 'ResNet50' == body_backbone_CNN:
            body_plain_model_output = GlobalAveragePooling2D(name='GAP')(body_plain_model_output)

        if 'ResNet50' == image_backbone_CNN:
            image_plain_model_output = GlobalAveragePooling2D(name='GAP')(image_plain_model_output)

        merged = concatenate([body_plain_model_output, image_plain_model_output])

        x = Dense(256, activation='relu', name='FC1', kernel_regularizer=regularizers.l2(0.01), kernel_initializer='random_normal')(merged)

        x = Dropout(0.5, name='DROPOUT')(x)

        vad_cont_prediction = Dense(units=3, kernel_initializer='random_normal', name='VAD')(x)

        # At model instantiation, you specify the two inputs and the output.
        self.model = Model(inputs=[body_inputs, image_inputs], outputs=vad_cont_prediction, name='EMOTIC-VAD-regression')

        print('[INFO] Randomly initialised classifier was successfully added on top of the merged modules.')

        print('[INFO] Number of trainable weights before freezing the conv. bases of the respective original models: '
              '' + str(len(self.model.trainable_weights)))

        # first: train only the top layers (which were randomly initialized)
        # i.e. freeze all convolutional layers of the preliminary base model
        for layer in self.body_truncated_model.layers:
            layer.trainable = False

        for layer in self.image_truncated_model.layers:
            layer.trainable = False

        print('[INFO] Number of trainable weights after freezing the conv. bases of the respective original models: '
              '' + str(len(self.model.trainable_weights)))

        # # reference https://github.com/keras-team/keras/issues/4735#issuecomment-267472549
        # self.class_weight = { 'VALENCE': {0: 36.00, 1: 36.00, 2: 12.00, 3: 5.14, 4: 2.25, 5: 1.00, 6: 1.89, 7: 2.57, 8: 12.00, 9: 36.00},
        #                       'AROUSAL': {0: 23.00, 1: 11.50, 2: 4.60, 3: 1.00, 4: 2.09, 5: 1.64, 6: 1.14, 7: 2.09, 8: 3.83, 9: 4.60},
        #                       'DOMINANCE': {0: 34.00, 1: 17.00, 2: 11.33, 3: 6.80, 4: 5.66, 5: 1.70, 6: 1.00, 7: 2.42, 8: 3.40, 9: 6.80}
        #                     }


        self.model.compile(optimizer=SGD(lr=1e-5, momentum=0.9),
                           # loss='mse',
                           loss = euclidean_distance_loss,
                           metrics=['mae','mse', rmse])

        # print ('[INFO] Metrics names: ',self.model.metrics_names )

        print('[INFO] End-to-end `EMOTIC-VAD-regression` model has been successfully compiled.')

        # -------------------------------------------------------------------------------- #
        #                                    Configurations
        # -------------------------------------------------------------------------------- #


        nb_train_samples = 23706
        nb_val_samples = 3332
        nb_test_samples = 7280

        train_generator_batch_size = 54
        val_generator_batch_size = 49
        test_generator_batch_size = 52

        self.steps_per_epoch = nb_train_samples // train_generator_batch_size
        self.validation_steps = nb_val_samples // val_generator_batch_size
        self.test_steps = nb_test_samples // test_generator_batch_size


        # -------------------------------------------------------------------------------- #
        #                                Read the HDF5 file
        # -------------------------------------------------------------------------------- #
        # open the hdf5 file
        hdf5_file = h5py.File(hdf5_file, "r")

        self.nb_train_data = hdf5_file["x_image_train"].shape[0]

        self.nb_val_data = hdf5_file["x_image_val"].shape[0]

        self.nb_test_data = hdf5_file["x_image_test"].shape[0]



        # -------------------------------------------------------------------------------- #
        #                         Instantiate the custom generators
        # -------------------------------------------------------------------------------- #

        print('[INFO] Setting up custom generators...')

        self.train_generator = custom_generator_single_output(hdf5_file=hdf5_file,
                                                              nb_data=self.nb_train_data,
                                                              batch_size=train_generator_batch_size,
                                                              mode='train')

        self.val_generator = custom_generator_single_output(hdf5_file=hdf5_file,
                                                            nb_data=self.nb_val_data,
                                                            batch_size=val_generator_batch_size,
                                                            mode='val')

        self.test_generator = custom_generator_single_output(hdf5_file=hdf5_file,
                                                             nb_data=self.nb_test_data,
                                                             batch_size=test_generator_batch_size,
                                                             mode='test')




        # -------------------------------------------------------------------------------- #
        #                                Usage of callbacks
        # -------------------------------------------------------------------------------- #

        self.weights_to_file = weights_to_file
        self.nb_of_epochs = nb_of_epochs

        # CSVLogger
        model_log = 'trained_models/logs/' + CSVLogger_filename
        csv_logger = CSVLogger(model_log, append=True, separator=',')


        # ModelCheckpoint
        checkpointer = ModelCheckpoint(filepath=weights_to_file,
                                       monitor=modelCheckpoint_quantity,
                                       verbose=1,
                                       save_best_only=True,
                                       mode='auto',
                                       period=1,
                                       save_weights_only=True)

        early_stop = EarlyStopping(monitor=earlyStopping_quantity, patience=5, mode='auto')

        self.callbacks_list = [checkpointer, early_stop, csv_logger]
예제 #3
0
def compoundNet_feature_extraction(object_centric_model='VGG16',
                                   scene_centric_model='VGG16_Places365',
                                   fusion_strategy='concatenate',
                                   pooling_mode='avg',
                                   classes=9,
                                   data_augm_enabled=False):
    """ConvNet as fixed feature extractor, consist of taking the convolutional base of a previously-trained network,
    running the new data through it, and training a new classifier on top of the output.
    (i.e. train only the randomly initialized top layers while freezing all convolutional layers of the original model).

    # Arguments
        object_centric_model: one of `VGG16`, `VGG19` or `ResNet50`
        scene_centric_model: `VGG16_Places365`
        fusion_strategy: one of `concatenate` (feature vectors of different sources are concatenated into one super-vector),
            `average` (the feature set is averaged) or `maximum` (selects the highest value from the corresponding features).
        pooling_mode: Optional pooling_mode mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling_mode
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling_mode will
                be applied.
        classes: optional number of classes to classify images into,
                            only to be specified if `weights` argument is `None`.
        data_augm_enabled: whether to use the augmented samples during training.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `object_centric_model`, `pooling_mode`,
        `fusion_strategy` , `scene_centric_model` or invalid input shape.
    """

    if not (object_centric_model in {'VGG16', 'VGG19', 'ResNet50'}):
        raise ValueError(
            'The `scene_centric_model` argument should be either '
            '`VGG16`, `VGG19` or `ResNet50`. Other models will be supported in future releases. '
        )

    if not (pooling_mode in {'avg', 'max', 'flatten'}):
        raise ValueError('The `pooling_mode` argument should be either '
                         '`avg` (GlobalAveragePooling2D), `max` '
                         '(GlobalMaxPooling2D), '
                         'or `flatten` (Flatten).')

    if not (fusion_strategy in {'concatenate', 'average', 'maximum'}):
        raise ValueError(
            'The `fusion_strategy` argument should be either '
            '`concatenate` (feature vectors of different sources are concatenated into one super-vector),'
            ' `average` (the feature set is averaged) '
            'or `maximum` (selects the highest value from the corresponding features).'
        )

    if not (scene_centric_model in {'VGG16_Places365'}):
        raise ValueError(
            'The `scene_centric_model` argument should be '
            '`VGG16_Places365`. Other models will be supported in future releases.'
        )

    # Define the name of the model and its weights
    weights_name = 'compoundNet_feature_extraction_' \
                   + object_centric_model + '_' \
                   + fusion_strategy + '_fusion_' \
                   + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5'


    augm_samples_weights_name = 'augm_compoundNet_feature_extraction_' \
                                + object_centric_model + '_' \
                                + fusion_strategy + '_fusion_' \
                                + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5'

    model_log = logs_dir + 'compoundNet_feature_extraction_' \
                                + object_centric_model + '_' \
                                + fusion_strategy + '_fusion_' \
                                + pooling_mode + '_pool_log.csv'
    csv_logger = CSVLogger(model_log, append=True, separator=',')

    input_tensor = Input(shape=(224, 224, 3))

    # create the base object_centric_model pre-trained model for warm-up
    if object_centric_model == 'VGG16':
        object_base_model = VGG16(input_tensor=input_tensor,
                                  weights='imagenet',
                                  include_top=False)

    elif object_centric_model == 'VGG19':
        object_base_model = VGG19(input_tensor=input_tensor,
                                  weights='imagenet',
                                  include_top=False)

    elif object_centric_model == 'ResNet50':
        tmp_model = ResNet50(input_tensor=input_tensor,
                             weights='imagenet',
                             include_top=False)
        object_base_model = Model(
            inputs=tmp_model.input,
            outputs=tmp_model.get_layer('activation_48').output)

    print('\n \n')
    print('The plain, object-centric `' + object_centric_model +
          '` pre-trained convnet was successfully initialised.\n')

    scene_base_model = VGG16_Places365(input_tensor=input_tensor,
                                       weights='places',
                                       include_top=False)

    print('The plain, scene-centric `' + scene_centric_model +
          '` pre-trained convnet was successfully initialised.\n')

    # retrieve the ouputs
    object_base_model_output = object_base_model.output
    scene_base_model_output = scene_base_model.output

    # We will feed the extracted features to a merging layer
    if fusion_strategy == 'concatenate':
        merged = concatenate(
            [object_base_model_output, scene_base_model_output])

    elif fusion_strategy == 'average':
        merged = average([object_base_model_output, scene_base_model_output])

    else:
        merged = maximum([object_base_model_output, scene_base_model_output])

    if pooling_mode == 'avg':
        x = GlobalAveragePooling2D(name='GAP')(merged)
    elif pooling_mode == 'max':
        x = GlobalMaxPooling2D(name='GMP')(merged)
    elif pooling_mode == 'flatten':
        x = Flatten(name='FLATTEN')(merged)

    x = Dense(256, activation='relu',
              name='FC1')(x)  # let's add a fully-connected layer

    # When random init is enabled, we want to include Dropout,
    # otherwise when loading a pre-trained HRA model we want to omit
    # Dropout layer so the visualisations are done properly (there is an issue if it is included)
    x = Dropout(0.5, name='DROPOUT')(x)
    # and a logistic layer with the number of classes defined by the `classes` argument
    predictions = Dense(classes, activation='softmax',
                        name='PREDICTIONS')(x)  # new softmax layer

    # this is the transfer learning model we will train
    model = Model(inputs=object_base_model.input, outputs=predictions)

    print(
        'Randomly initialised classifier was successfully added on top of the merged outputs. \n'
    )

    print(
        'Number of trainable weights before freezing the conv. bases of the respective original models: '
        '' + str(len(model.trainable_weights)))

    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional layers of the preliminary base model
    for layer in object_base_model.layers:
        layer.trainable = False

    for layer in scene_base_model.layers:
        layer.trainable = False

    print(
        'Number of trainable weights after freezing the conv. bases of the respective original models: '
        '' + str(len(model.trainable_weights)))

    print('\n')

    # compile the warm_up_model (should be done *after* setting layers to non-trainable)

    model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    # # The attribute model.metrics_names will give you the display labels for the scalar outputs.
    # print warm_up_model.metrics_names

    if data_augm_enabled:
        print(
            'Using augmented samples for training. This may take a while ! \n')

        t = now()

        history = model.fit_generator(augmented_train_generator,
                                      steps_per_epoch=nb_train_samples //
                                      batch_size,
                                      epochs=feature_extraction_epochs,
                                      callbacks=[csv_logger],
                                      class_weight=class_weight)

        print(
            'Training time for re-training the last Dense layer using augmented samples: %s'
            % (now() - t))

        model.save_weights(feature_extraction_dir + augm_samples_weights_name)
        print('Model weights using augmented samples were saved as `' +
              augm_samples_weights_name + '`')
        print('\n')

    else:
        t = now()
        history = model.fit_generator(train_generator,
                                      steps_per_epoch=nb_train_samples //
                                      batch_size,
                                      epochs=feature_extraction_epochs,
                                      callbacks=[csv_logger],
                                      class_weight=class_weight)

        print('Training time for re-training the last Dense layer: %s' %
              (now() - t))

        model.save_weights(feature_extraction_dir + weights_name)
        print('Model weights were saved as `' + weights_name + '`')
        print('\n')

    return model
def feature_extraction(pre_trained_model='VGG16',
                       pooling_mode='avg',
                       classes=9,
                       data_augm_enabled = False):
    """ConvNet as fixed feature extractor, consist of taking the convolutional base of a previously-trained network,
    running the new data through it, and training a new classifier on top of the output.
    (i.e. train only the randomly initialized top layers while freezing all convolutional layers of the original model).

    # Arguments
        pre_trained_model: one of `VGG16`, `VGG19`, `ResNet50`, `VGG16_Places365`
        pooling_mode: Optional pooling_mode mode for feature extraction
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling_mode
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling_mode will
                be applied.
        classes: optional number of classes to classify images into,
                            only to be specified if `weights` argument is `None`.
        data_augm_enabled: whether to augment the samples during training

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `pre_trained_model`, `pooling_mode` or invalid input shape.
    """


    if not (pre_trained_model in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}):
        raise ValueError('The `pre_trained_model` argument should be either '
                         '`VGG16`, `VGG19`, `ResNet50`, '
                         'or `VGG16_Places365`. Other models will be supported in future releases. ')

    if not (pooling_mode in {'avg', 'max', 'flatten'}):
        raise ValueError('The `pooling_mode` argument should be either '
                         '`avg` (GlobalAveragePooling2D), `max` '
                         '(GlobalMaxPooling2D), '
                         'or `flatten` (Flatten).')

    # Define the name of the model and its weights
    weights_name = 'feature_extraction_' + pre_trained_model + '_' + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5'

    augm_samples_weights_name = 'augm_feature_extraction_' + pre_trained_model + '_' + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5'

    model_log = logs_dir + 'feature_extraction_' + pre_trained_model + '_' + pooling_mode + '_pool_log.csv'
    csv_logger = CSVLogger(model_log, append=True, separator=',')

    input_tensor = Input(shape=(224, 224, 3))

    # create the base pre-trained model for warm-up
    if pre_trained_model == 'VGG16':
        base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)

    elif pre_trained_model == 'VGG19':
        base_model = VGG19(weights='imagenet', include_top=False, input_tensor=input_tensor)

    elif pre_trained_model == 'ResNet50':
        base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor)

    elif pre_trained_model == 'VGG16_Places365':
        base_model = VGG16_Places365(weights='places', include_top=False, input_tensor=input_tensor)

    print ('\n \n')
    print('The plain `' + pre_trained_model + '` pre-trained convnet was successfully initialised.\n')


    x = base_model.output

    # Now we set-up transfer learning process - freeze all but the penultimate layer
    # and re-train the last Dense layer with 9 final outputs representing probabilities for HRA classes.
    # Build a  randomly initialised classifier model to put on top of the convolutional model

    # both `avg`and `max`result in the same size of the Dense layer afterwards
    # Both Flatten and GlobalAveragePooling2D are valid options. So is GlobalMaxPooling2D.
    # Flatten will result in a larger Dense layer afterwards, which is more expensive
    # and may result in worse overfitting. But if you have lots of data, it might also perform better.
    # https://github.com/keras-team/keras/issues/8470
    if pooling_mode == 'avg':
        x = GlobalAveragePooling2D(name='GAP')(x)
    elif pooling_mode == 'max':
        x = GlobalMaxPooling2D(name='GMP')(x)
    elif pooling_mode == 'flatten':
        x = Flatten(name='FLATTEN')(x)


    x = Dense(256, activation='relu', name='FC1')(x)  # let's add a fully-connected layer

    # When random init is enabled, we want to include Dropout,
    # otherwise when loading a pre-trained HRA model we want to omit
    # Dropout layer so the visualisations are done properly (there is an issue if it is included)
    x = Dropout(0.5, name='DROPOUT')(x)
    # and a logistic layer with the number of classes defined by the `classes` argument
    predictions = Dense(classes, activation='softmax', name='PREDICTIONS')(x)  # new softmax layer

    # this is the transfer learning model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    print('Randomly initialised classifier was successfully added on top of the original pre-trained conv. base. \n')

    print('Number of trainable weights before freezing the conv. base of the original pre-trained convnet: '
          '' + str(len(model.trainable_weights)))

    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional layers of the preliminary base model
    for layer in base_model.layers:
        layer.trainable = False

    print('Number of trainable weights after freezing the conv. base of the pre-trained convnet: '
          '' + str(len(model.trainable_weights)))

    print ('\n')

    # compile the warm_up_model (should be done *after* setting layers to non-trainable)

    model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()


    # # The attribute model.metrics_names will give you the display labels for the scalar outputs.
    # print warm_up_model.metrics_names

    if data_augm_enabled:
        print('Using augmented samples for training. This may take a while ! \n')

        t = now()

        history = model.fit_generator(augmented_train_generator,
                                      steps_per_epoch=nb_train_samples // batch_size,
                                      epochs=feature_extraction_epochs,
                                      callbacks=[csv_logger])

        print('Training time for re-training the last Dense layer using augmented samples: %s' % (now() - t))

        model.save_weights(feature_extraction_dir + augm_samples_weights_name)
        print(
            'Model weights using augmented samples were saved as `' + augm_samples_weights_name + '`')
        print ('\n')


    else:
        t = now()
        history = model.fit_generator(train_generator,
                                      steps_per_epoch=nb_train_samples // batch_size,
                                      epochs=feature_extraction_epochs,
                                      callbacks=[csv_logger])

        print('Training time for re-training the last Dense layer: %s' % (now() - t))

        model.save_weights(feature_extraction_dir + weights_name)
        print('Model weights were saved as `' + weights_name + '`')
        print ('\n')

    return model
def HRA_VGG16_Places365(include_top=True,
                        weights='HRA',
                        input_tensor=None,
                        input_shape=None,
                        nb_of_conv_layers_to_fine_tune=None,
                        first_phase_trained_weights=None,
                        violation_class='cl',
                        verbose=0):
    """Instantiates the VGG16-Places365 architecture fine-tuned (2 steps) on Human Rights Archive dataset.

    Optionally loads weights pre-trained on the 2 class version of Human Rights Archive Database.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
            'HRA' (pre-training on Human Rights Archive),
            or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 input channels,
            and width and height should be no smaller than 48.
            E.g. `(200, 200, 3)` would be one valid value.
        nb_of_conv_layers_to_fine_tune: integer to indicate the number of convolutional
            layers to fine-tune. One of `1` (2,499,360 trainable params), `2` (4,859,168 trainable params) or `3` (7,218,976 trainable params).
        first_phase_trained_weights: Weights of an already trained Keras model instance.
            Only relevant when using `fine_tuning` as train_mode after `feature_extraction` weights have been saved.
        violation_class: one of `cl` (HRA dataset with 2 classes - [i]'child_labour' and [ii]'no violation')
            or `dp` (HRA dataset with 2 classes - [i]'displaced_populations' and [ii]'no violation')
        verbose: Integer. 0, or 1. Verbosity mode. 0 = silent, 1 = model summary and weights info.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`, `violation_class`, `nb_of_conv_layers_to_fine_tune` or invalid input shape
        """
    if not (weights in {'HRA', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `HRA` '
                         '(pre-training on Human Rights Archive two-class), '
                         'or the path to the weights file to be loaded.')

    if not (violation_class in {'cl', 'dp'}):
        raise ValueError(
            "The `violation_class` argument should be either "
            "`cl` (HRA dataset with 2 classes - [i]'child_labour' and [ii]'no violation') "
            "'or `dp` (HRA dataset with 2 classes - [i]'displaced_populations' and [ii]'no violation')"
        )

    if nb_of_conv_layers_to_fine_tune is None and include_top is False:
        raise ValueError(
            'Setting the `include_top` argument as false '
            'is only relevant when the `nb_of_conv_layers_to_fine_tune` argument is not None (feature extraction), '
            'otherwise the returned model would be exactly the default '
            'keras-applications model.')

    if weights == 'HRA' and first_phase_trained_weights is not None:
        raise ValueError(
            'Setting the `first_phase_trained_weights` argument as the path to the weights file '
            'obtained from utilising feature_extraction '
            'is only relevant when the `weights` argument is `None`. '
            'If the `weights` argument is `HRA`, it means the model has already been trained on HRA dataset '
            'and there is no need to provide a path to the weights file (saved from feature_extraction) to be loaded.'
        )

    if not (nb_of_conv_layers_to_fine_tune in {1, 2, 3, None}):
        raise ValueError(
            'The `nb_of_conv_layers_to_fine_tune` argument should be either '
            '`None` (indicates feature extraction mode), '
            '`1`, `2` or `3`. '
            'More than 3 conv. layers are not supported because the more parameters we are training , '
            'the more we are at risk of overfitting.')

    cache_subdir = 'AbuseNet'

    mode = _obtain_train_mode(
        nb_of_conv_layers_to_fine_tune=nb_of_conv_layers_to_fine_tune)

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # Ensure that the model takes into account any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # create the base pre-trained model
    base_model = VGG16_Places365(weights='places',
                                 include_top=False,
                                 input_tensor=img_input)
    x = base_model.output

    # Classification block - build a classifier model to put on top of the convolutional model
    if include_top:

        # add a global spatial pooling layer (which seems to have the best performance)
        x = GlobalAveragePooling2D(name='GAP')(x)

        # add a fully-connected layer
        x = Dense(256, activation='relu', name='FC1')(x)

        # When random init is enabled, we want to include Dropout,
        # otherwise when loading a pre-trained HRA model we want to omit that layer,
        # so the visualisations are done properly (there is an issue if it is included)
        if weights is None:
            x = Dropout(0.5, name='DROPOUT')(x)
        # and a logistic layer with the number of classes defined by the `classes` argument
        x = Dense(2, activation='softmax', name='PREDICTIONS')(x)

        model = Model(inputs=inputs,
                      outputs=x,
                      name='HRA-2CLASS-VGG16_Places365')

    else:
        model = Model(inputs=inputs,
                      outputs=x,
                      name='HRA-2CLASS-VGG16_Places365-NO-TOP')
        model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        return model

    if mode == 'feature_extraction':

        print('[INFO] Feature extraction mode. \n')

        if verbose == 1:
            print(
                '[INFO] Number of trainable weights before freezing the conv. base of the original pre-trained convnet: '
                '' + str(len(model.trainable_weights)))

        for layer in base_model.layers:
            layer.trainable = False

        if verbose == 1:
            print(
                '[INFO] Number of trainable weights after freezing the conv. base of the original pre-trained convnet: '
                '' + str(len(model.trainable_weights)))

        model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    elif mode == 'fine_tuning':

        if nb_of_conv_layers_to_fine_tune == 1:
            # Uncomment for extra verbosity
            # print('[INFO] Fine-tuning of the last one (1) conv. layer. \n')

            if verbose == 1:
                print(
                    '[INFO] Number of trainable weights before unfreezing the last conv. layer of the model with the retrained classifier: '
                    '' + str(len(model.trainable_weights)))

            for layer in model.layers[:17]:
                layer.trainable = False
            for layer in model.layers[17:]:
                layer.trainable = True

            if verbose == 1:
                print(
                    '[INFO] Number of trainable weights after unfreezing the last conv. layer of the model with the retrained classifier: '
                    '' + str(len(model.trainable_weights)))

        elif nb_of_conv_layers_to_fine_tune == 2:
            # Uncomment for extra verbosity
            # print('[INFO] Fine-tuning of the last two (2) conv. layers. \n')
            if verbose == 1:
                print(
                    '[INFO] Number of trainable weights before unfreezing the last two (2) conv. layers of the model with the retrained classifier: '
                    '' + str(len(model.trainable_weights)))

            for layer in model.layers[:16]:
                layer.trainable = False
            for layer in model.layers[16:]:
                layer.trainable = True

            if verbose == 1:
                print(
                    '[INFO] Number of trainable weights after unfreezing the last two (2) conv. layers of the model with the retrained classifier: '
                    '' + str(len(model.trainable_weights)))

        elif nb_of_conv_layers_to_fine_tune == 3:
            # Uncomment for extra verbosity
            # print('[INFO] Fine-tuning of the last three (3) conv. layers. \n')
            if verbose == 1:
                print(
                    '[INFO] Number of trainable weights before unfreezing the last three (3) conv. layers of the model with the retrained classifier: '
                    '' + str(len(model.trainable_weights)))

            for layer in model.layers[:15]:
                layer.trainable = False
            for layer in model.layers[15:]:
                layer.trainable = True

            if verbose == 1:
                print(
                    '[INFO] Number of trainable weights after unfreezing the last three (3) conv. layers of the model with the retrained classifier: '
                    '' + str(len(model.trainable_weights)))

        model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    if verbose == 1:
        model.summary()

    # load weights
    if weights == 'HRA':

        # Child labour
        if violation_class == 'cl':
            if include_top:
                if mode == 'feature_extraction':
                    weights_path = get_file(CL_FEATURE_EXTRACTION_FNAME,
                                            CL_WEIGHTS_FEATURE_EXTRACTION_PATH,
                                            cache_subdir=cache_subdir)

                elif mode == 'fine_tuning':

                    if nb_of_conv_layers_to_fine_tune == 1:
                        weights_path = get_file(CL_PATH_ONE_CONV_LAYER_FNAME,
                                                CL_WEIGHTS_PATH_ONE_CONV_LAYER,
                                                cache_subdir=cache_subdir)
                    elif nb_of_conv_layers_to_fine_tune == 2:
                        weights_path = get_file(
                            CL_PATH_TWO_CONV_LAYERS_FNAME,
                            CL_WEIGHTS_PATH_TWO_CONV_LAYERS,
                            cache_subdir=cache_subdir)
                    elif nb_of_conv_layers_to_fine_tune == 3:
                        weights_path = get_file(
                            CL_PATH_THREE_CONV_LAYERS_FNAME,
                            CL_WEIGHTS_PATH_THREE_CONV_LAYERS,
                            cache_subdir=cache_subdir)

            # no top
            else:
                if nb_of_conv_layers_to_fine_tune == 1:
                    weights_path = get_file(
                        CL_PATH_ONE_CONV_LAYER_NO_TOP_FNAME,
                        CL_WEIGHTS_PATH_ONE_CONV_LAYER_NO_TOP,
                        cache_subdir=cache_subdir)
                elif nb_of_conv_layers_to_fine_tune == 2:
                    weights_path = get_file(
                        CL_PATH_TWO_CONV_LAYERS_NO_TOP_FNAME,
                        CL_WEIGHTS_PATH_TWO_CONV_LAYERS_NO_TOP,
                        cache_subdir=cache_subdir)
                elif nb_of_conv_layers_to_fine_tune == 3:
                    weights_path = get_file(
                        CL_PATH_THREE_CONV_LAYERS_NO_TOP_FNAME,
                        CL_WEIGHTS_PATH_THREE_CONV_LAYERS_NO_TOP,
                        cache_subdir=cache_subdir)
        # Displaced populations
        elif violation_class == 'dp':
            if include_top:
                if mode == 'feature_extraction':
                    weights_path = get_file(DP_FEATURE_EXTRACTION_FNAME,
                                            DP_WEIGHTS_FEATURE_EXTRACTION_PATH,
                                            cache_subdir=cache_subdir)

                elif mode == 'fine_tuning':

                    if nb_of_conv_layers_to_fine_tune == 1:
                        weights_path = get_file(DP_PATH_ONE_CONV_LAYER_FNAME,
                                                DP_WEIGHTS_PATH_ONE_CONV_LAYER,
                                                cache_subdir=cache_subdir)
                    elif nb_of_conv_layers_to_fine_tune == 2:
                        weights_path = get_file(
                            DP_PATH_TWO_CONV_LAYERS_FNAME,
                            DP_WEIGHTS_PATH_TWO_CONV_LAYERS,
                            cache_subdir=cache_subdir)
                    elif nb_of_conv_layers_to_fine_tune == 3:
                        weights_path = get_file(
                            DP_PATH_THREE_CONV_LAYERS_FNAME,
                            DP_WEIGHTS_PATH_THREE_CONV_LAYERS,
                            cache_subdir=cache_subdir)

            # no top
            else:
                if nb_of_conv_layers_to_fine_tune == 1:
                    weights_path = get_file(
                        DP_PATH_ONE_CONV_LAYER_NO_TOP_FNAME,
                        DP_WEIGHTS_PATH_ONE_CONV_LAYER_NO_TOP,
                        cache_subdir=cache_subdir)
                elif nb_of_conv_layers_to_fine_tune == 2:
                    weights_path = get_file(
                        DP_PATH_TWO_CONV_LAYERS_NO_TOP_FNAME,
                        DP_WEIGHTS_PATH_TWO_CONV_LAYERS_NO_TOP,
                        cache_subdir=cache_subdir)
                elif nb_of_conv_layers_to_fine_tune == 3:
                    weights_path = get_file(
                        DP_PATH_THREE_CONV_LAYERS_NO_TOP_FNAME,
                        DP_WEIGHTS_PATH_THREE_CONV_LAYERS_NO_TOP,
                        cache_subdir=cache_subdir)

        model.load_weights(weights_path)

    elif weights is not None:
        model.load_weights(weights)

    return model
    def __init__(self, pre_trained_model):
        """
        Base class for feature extraction.

        :param pre_trained_model: one of `VGG16`, `VGG19`, `ResNet50`, `VGG16_Places365`
        """

        # Base directory of raw jpg/png images
        base_dir = '/home/gkallia/git/Human-Rights-Archive-CNNs/datasets/Human_Rights_Archive_DB'

        train_dir = os.path.join(base_dir, 'train_val')
        test_dir = os.path.join(base_dir, 'test')
        self.nb_train_samples = 3050
        self.nb_test_samples = 270

        # human_rights_classes = ['arms', 'child_labour', 'child_marriage', 'detention_centres',
        #                         'disability_rights', 'displaced_populations', 'environment',
        #                         'no_violation', 'out_of_school']

        # Augmentation configuration with only rescaling.
        # Rescale is a value by which we will multiply the data before any other processing.
        # Our original images consist in RGB coefficients in the 0-255, but such values would
        # be too high for our models to process (given a typical learning rate),
        # so we target values between 0 and 1 instead by scaling with a 1/255. factor.
        datagen = ImageDataGenerator(rescale=1. / 255)

        img_width, img_height = 224, 224

        self.train_batch_size = 25
        self.test_batch_size = 15
        self.train_generator = datagen.flow_from_directory(
            train_dir,
            target_size=(img_width, img_height),
            class_mode='categorical',
            batch_size=self.train_batch_size)

        self.test_generator = datagen.flow_from_directory(
            test_dir,
            target_size=(img_width, img_height),
            class_mode='categorical',
            batch_size=self.test_batch_size)

        if not (pre_trained_model
                in {'VGG16', 'VGG19', 'ResNet50', 'VGG16_Places365'}):
            raise ValueError(
                'The `pre_trained_model` argument should be either '
                '`VGG16`, `VGG19`, `ResNet50`, '
                'or `VGG16_Places365`. Other models will be supported in future releases. '
            )

        input_tensor = Input(shape=(224, 224, 3))

        # create the base pre-trained model for warm-up
        if pre_trained_model == 'VGG16':
            self.conv_base = VGG16(weights='imagenet',
                                   include_top=False,
                                   input_tensor=input_tensor)

        elif pre_trained_model == 'VGG19':
            self.conv_base = VGG19(weights='imagenet',
                                   include_top=False,
                                   input_tensor=input_tensor)

        elif pre_trained_model == 'ResNet50':
            self.conv_base = ResNet50(weights='imagenet',
                                      include_top=False,
                                      input_tensor=input_tensor)

        elif pre_trained_model == 'VGG16_Places365':
            self.conv_base = VGG16_Places365(weights='places',
                                             include_top=False,
                                             input_tensor=input_tensor)

        self.bottleneck_train_features_filename = 'bottleneck_train_features_' + pre_trained_model + '.npy'
        self.bottleneck_train_labels_filename = 'bottleneck_train_labels_' + pre_trained_model + '.npy'
        self.bottleneck_test_features_filename = 'bottleneck_test_features_' + pre_trained_model + '.npy'
        self.bottleneck_test_labels_filename = 'bottleneck_test_labels_' + pre_trained_model + '.npy'

        self.cache_subdir = 'HRA_models'
        self.pre_trained_model = pre_trained_model
예제 #7
0
def EMOTIC_VAD_ResNet50(include_top=True, weights='emotic'):
    """Instantiates the EMOTIC_VAD_ResNet50 architecture.

    Optionally loads weights pre-trained
    on EMOTIC. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format="channels_last"` in your Keras config
    at ~/.keras/keras.json.

    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
                 'emotic' (pre-training on EMOTIC),
                 or the path to the weights file to be loaded.
        classes: optional number of discrete emotion classes to classify images into.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`
    """

    if not (weights in {'emotic', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `emotic` '
                         '(pre-training on EMOTIC dataset), '
                         'or the path to the weights file to be loaded.')

    body_inputs = Input(shape=(224, 224, 3), name='INPUT')
    image_inputs = Input(shape=(224, 224, 3), name='INPUT')

    # Body module
    tmp_model = ResNet50(include_top=False,
                         weights='imagenet',
                         input_tensor=body_inputs,
                         pooling='avg')

    body_truncated_model = Model(inputs=tmp_model.input,
                                 outputs=tmp_model.get_layer(index=169).output)

    # body_truncated_model = Model(inputs=tmp_model.input, outputs=tmp_model.get_layer('activation_48').output)

    for layer in body_truncated_model.layers:
        layer.name = str("body-") + layer.name

    # Image module
    image_truncated_model = VGG16_Places365(include_top=False,
                                            weights='places',
                                            input_tensor=image_inputs,
                                            pooling='avg')

    for layer in image_truncated_model.layers:
        layer.name = str("image-") + layer.name

    # retrieve the ouputs
    body_plain_model_output = body_truncated_model.output
    image_plain_model_output = image_truncated_model.output

    # In case ResNet50 is selected we need to use a global average pooling layer to follow the process used for the othe CNNs.
    body_plain_model_output = GlobalAveragePooling2D(
        name='GAP')(body_plain_model_output)

    merged = concatenate([body_plain_model_output, image_plain_model_output])

    x = Dense(256,
              activation='relu',
              name='FC1',
              kernel_regularizer=regularizers.l2(0.01),
              kernel_initializer='random_normal')(merged)

    x = Dropout(0.5, name='DROPOUT')(x)

    vad_cont_prediction = Dense(units=3,
                                kernel_initializer='random_normal',
                                name='VAD')(x)

    # At model instantiation, you specify the two inputs and the output.
    model = Model(inputs=[body_inputs, image_inputs],
                  outputs=vad_cont_prediction,
                  name='EMOTIC-VAD-regression-ResNet50')

    for layer in body_truncated_model.layers:
        layer.trainable = False

    for layer in image_truncated_model.layers:
        layer.trainable = False

    model.compile(optimizer=SGD(lr=1e-5, momentum=0.9),
                  loss=euclidean_distance_loss,
                  metrics=['mae', 'mse', rmse])

    # load weights
    if weights == 'emotic':
        if include_top:
            weights_path = get_file(
                'emotic_vad_ResNet50_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='AbuseNet')
        else:
            weights_path = get_file(
                'emotic_vad_ResNet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='AbuseNet')

        model.load_weights(weights_path)

    elif weights is not None:
        model.load_weights(weights)

    return model
def HRA_VGG16_Places365(include_top=True,
                        weights='HRA',
                        input_tensor=None,
                        input_shape=None,
                        mode='fine_tuning',
                        pooling_mode='avg',
                        classes=9,
                        data_augm_enabled=False):
    """Instantiates the VGG16_Places365 architecture fine-tuned (2 steps) on Human Rights Archive dataset.

    Optionally loads weights pre-trained on Human Rights Archive Database.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
            'HRA' (pre-training on Human Rights Archive),
            or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 input channels,
            and width and height should be no smaller than 48.
            E.g. `(200, 200, 3)` would be one valid value.
        mode: one of `TL` (transfer learning - freeze all but the penultimate layer and re-train the last Dense layer)
            or `FT` (fine-tuning - unfreeze the lower convolutional layers and retrain more layers) ,
        pooling_mode: Pooling mode that will be applied to the output of the last convolutional layer of the original model
            and thus the output of the model will be a 2D tensor.
            - `avg` means that global average pooling_mode operation for spatial data will be applied.
            - `max` means that global max pooling_mode operation for spatial data will be applied.
            - `flatten` means that the output of the the last convolutional
                layer of the original model will be flatten,
                resulting in a larger Dense layer afterwards.
        classes: optional number of classes to classify images into.
        data_augm_enabled: whether to use the augmented samples during training.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`, or invalid input shape
        """
    if not (weights in {'HRA', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `HRA` '
                         '(pre-training on Human Rights Archive), '
                         'or the path to the weights file to be loaded.')

    if not (mode in {'feature_extraction', 'fine_tuning'}):
        raise ValueError(
            'The `mode` argument should be either '
            '`feature_extraction` (freeze all but the penultimate layer and re-train the last Dense layer),'
            'or `fine_tuning` (unfreeze the lower convolutional layers and retrain more layers). '
        )

    if not (pooling_mode in {'avg', 'max', 'flatten'}):
        raise ValueError(
            'The `pooling_mode` argument should be either '
            '`avg` (global average pooling_mode), `max` (global max pooling_mode)'
            'or `flatten` (the output will be flatten). ')

    if mode == 'feature_extraction' and include_top is False:
        raise ValueError(
            'The `include_top` argument can be set as false only '
            'when the `mode` argument is `fine_tuning`. '
            'If not, the returned model would have been literally the default '
            'keras-applications model and not the one trained on HRA.')

    cache_subdir = 'hra_models_fewer_params'

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # Ensure that the model takes into account any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # create the base pre-trained model
    base_model = VGG16_Places365(weights='places',
                                 include_top=False,
                                 input_tensor=img_input)
    x = base_model.output

    # Classification block - build a classifier model to put on top of the convolutional model
    if include_top:

        # add a global spatial pooling_mode layer or flatten the obtained output from the original model
        if pooling_mode == 'avg':
            x = GlobalAveragePooling2D(name='GAP')(x)
        elif pooling_mode == 'max':
            x = GlobalMaxPooling2D(name='GMP')(x)
        elif pooling_mode == 'flatten':
            x = Flatten(name='FLATTEN')(x)

        # add a fully-connected layer
        x = Dense(256, activation='relu', name='FC1')(x)

        # When random init is enabled, we want to include Dropout,
        # otherwise when loading a pre-trained HRA model we want to omit that layer,
        # so the visualisations are done properly (there is an issue if it is included)
        if weights is None:
            x = Dropout(0.5, name='DROPOUT')(x)
        # and a logistic layer with the number of classes defined by the `classes` argument
        x = Dense(classes, activation='softmax', name='PREDICTIONS')(x)

    model = Model(inputs=inputs, outputs=x, name='HRA-VGG16_Places365')

    # load weights
    if weights == 'HRA':
        if include_top:
            if mode == 'feature_extraction':
                for layer in base_model.layers:
                    layer.trainable = False

                model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                              loss='categorical_crossentropy')

                if data_augm_enabled:
                    if pooling_mode == 'avg':
                        weights_path = get_file(
                            AUGM_FEATURE_EXTRACTION_AVG_POOL_fname,
                            AUGM_FEATURE_EXTRACTION_AVG_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)
                    elif pooling_mode == 'flatten':
                        weights_path = get_file(
                            AUGM_FEATURE_EXTRACTION_FLATTEN_POOL_fname,
                            AUGM_FEATURE_EXTRACTION_FLATTEN_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

                    elif pooling_mode == 'max':
                        weights_path = get_file(
                            AUGM_FEATURE_EXTRACTION_MAX_POOL_fname,
                            AUGM_FEATURE_EXTRACTION_MAX_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

                else:
                    if pooling_mode == 'avg':
                        weights_path = get_file(
                            FEATURE_EXTRACTION_AVG_POOL_fname,
                            FEATURE_EXTRACTION_AVG_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)
                    elif pooling_mode == 'flatten':
                        weights_path = get_file(
                            FEATURE_EXTRACTION_FLATTEN_POOL_fname,
                            FEATURE_EXTRACTION_FLATTEN_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

                    elif pooling_mode == 'max':
                        weights_path = get_file(
                            FEATURE_EXTRACTION_MAX_POOL_fname,
                            FEATURE_EXTRACTION_MAX_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

            elif mode == 'fine_tuning':
                if pooling_mode == 'flatten':
                    for layer in model.layers[:17]:
                        layer.trainable = False
                    for layer in model.layers[17:]:
                        layer.trainable = True
                else:
                    for layer in model.layers[:16]:
                        layer.trainable = False
                    for layer in model.layers[16:]:
                        layer.trainable = True

                model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                              loss='categorical_crossentropy',
                              metrics=['accuracy'])

                if data_augm_enabled:
                    if pooling_mode == 'avg':
                        weights_path = get_file(
                            AUGM_FINE_TUNING_AVG_POOL_fname,
                            AUGM_FINE_TUNING_AVG_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)
                    elif pooling_mode == 'flatten':
                        weights_path = get_file(
                            AUGM_FINE_TUNING_FLATTEN_POOL_fname,
                            AUGM_FINE_TUNING_FLATTEN_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

                    elif pooling_mode == 'max':
                        weights_path = get_file(
                            AUGM_FINE_TUNING_MAX_POOL_fname,
                            AUGM_FINE_TUNING_MAX_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

                else:
                    if pooling_mode == 'avg':
                        weights_path = get_file(
                            FINE_TUNING_AVG_POOL_fname,
                            FINE_TUNING_AVG_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)
                    elif pooling_mode == 'flatten':
                        weights_path = get_file(
                            FINE_TUNING_FLATTEN_POOL_fname,
                            FINE_TUNING_FLATTEN_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

                    elif pooling_mode == 'max':
                        weights_path = get_file(
                            FINE_TUNING_MAX_POOL_fname,
                            FINE_TUNING_MAX_POOL_WEIGHTS_PATH,
                            cache_subdir=cache_subdir)

        else:
            weights_path = get_file(FINE_TUNING_WEIGHTS_PATH_NO_TOP_fname,
                                    FINE_TUNING_WEIGHTS_PATH_NO_TOP,
                                    cache_subdir=cache_subdir)

        model.load_weights(weights_path)

    elif weights is not None:
        model.load_weights(weights)

    return model