Esempio n. 1
0
 def call(self, xs, mask=None):
     assert len(xs) == 2
     # separate out input matrices
     # x1, x2: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
     x1, x2 = xs
     # build alignment matrix
     # alpha: (BATCH_SIZE, MAX_TIMESTEPS, MAX_TIMESTEPS)
     alpha = K.softmax(
         K.batch_dot(K.dot(x2, self.W), K.permute_dimensions(x1,
                                                             (0, 2, 1))))
     # build context vectors
     # c1, c2: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
     c1 = K.repeat_elements(K.sum(K.batch_dot(alpha, x2),
                                  axis=1,
                                  keepdims=True),
                            self.max_timesteps,
                            axis=1)
     c2 = K.repeat_elements(K.sum(K.batch_dot(
         K.permute_dimensions(alpha, (0, 2, 1)), x1),
                                  axis=1,
                                  keepdims=True),
                            self.max_timesteps,
                            axis=1)
     # build attention vector
     # o1t, o2t: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
     o1t = K.tanh(K.dot(K.concatenate([c1, x1], axis=2), self.U1))
     o2t = K.tanh(K.dot(K.concatenate([c2, x2], axis=2), self.U2))
     # masking
     if mask is not None and mask[0] is not None:
         o1t *= K.cast(
             K.repeat_elements(K.expand_dims(mask[0], axis=2), o1t.shape[2],
                               2), K.floatx())
     if mask is not None and mask[1] is not None:
         o2t *= K.cast(
             K.repeat_elements(K.expand_dims(mask[0], axis=2), o2t.shape[2],
                               2), K.floatx())
     # sum over timesteps
     # o1, o2: (BATCH_SIZE, EMBED_SIZE)
     o1 = K.sum(o1t, axis=1)
     o2 = K.sum(o2t, axis=1)
     # merge the attention vectors according to merge_mode
     if self.merge_mode == "concat":
         return concatenate([o1, o2], axis=1)
     elif self.merge_mode == "diff":
         return add([o1, -o2])
     elif self.merge_mode == "prod":
         return multiply([o1, o2])
     elif self.merge_mode == "avg":
         return average([o1, o2])
     else:  # max
         return maximum([o1, o2])
Esempio n. 2
0
def Model_sent2tag_MLP_1(sentvocabsize, tagvocabsize, sent_W, tag_W, s2v_k,
                         tag2v_k):

    input_sent = Input(shape=(1, ), dtype='int32')
    sent_embedding = Embedding(input_dim=sentvocabsize,
                               output_dim=s2v_k,
                               input_length=1,
                               mask_zero=False,
                               trainable=False,
                               weights=[sent_W])(input_sent)

    input_tag = Input(shape=(1, ), dtype='int32')
    tag_embedding = Embedding(input_dim=tagvocabsize,
                              output_dim=tag2v_k,
                              input_length=1,
                              mask_zero=False,
                              trainable=False,
                              weights=[tag_W])(input_tag)

    x1_1 = Flatten()(sent_embedding)
    x2_0 = Flatten()(tag_embedding)

    # x1_1 = Dense(100, activation='tanh')(x1_0)

    sub = subtract([x2_0, x1_1])
    mul = multiply([x2_0, x1_1])
    max = maximum([x2_0, x1_1])
    avg = average([x2_0, x1_1])
    class_input = concatenate([x2_0, x1_1, sub, mul, max, avg], axis=-1)
    # class_input = Flatten()(class_input)
    class_mlp1 = Dense(200, activation='tanh')(class_input)
    class_mlp1 = Dropout(0.5)(class_mlp1)
    class_mlp2 = Dense(2)(class_mlp1)
    class_output = Activation('softmax', name='CLASS')(class_mlp2)

    # distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([mlp_x1_2, x2_0])
    # distance = dot([x1_0, x2_0], axes=-1, normalize=True)

    mymodel = Model([input_sent, input_tag], class_output)

    mymodel.compile(loss='categorical_crossentropy',
                    optimizer=optimizers.Adam(lr=0.001),
                    metrics=['acc'])

    return mymodel
Esempio n. 3
0
 def call(self, xs, mask=None):
     assert len(xs) == 2
     # separate out input matrices
     # x1.shape == (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
     # x2.shape == (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
     x1, x2 = xs
     # build alignment matrix
     alpha = K.softmax(K.batch_dot(x1, x2, axes=(2, 2)))
     # align inputs
     # a1t, a2t: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
     a1t = K.batch_dot(alpha, x2, axes=(1, 1))
     a2t = K.batch_dot(alpha, x1, axes=(2, 1))
     # produce aligned outputs
     # o1t, o2t: (BATCH_SIZE, MAX_TIMESTEPS*2, EMBED_SIZE)
     o1t = K.tanh(K.dot(x1, self.U1) + K.dot(a1t, self.V1))
     o2t = K.tanh(K.dot(x2, self.U2) + K.dot(a2t, self.V2))
     # masking
     if mask is not None and mask[0] is not None:
         o1t *= K.cast(
             K.repeat_elements(K.expand_dims(mask[0], axis=2), o1t.shape[2],
                               2), K.floatx())
     if mask is not None and mask[1] is not None:
         o2t *= K.cast(
             K.repeat_elements(K.expand_dims(mask[1], axis=2), o2t.shape[2],
                               2), K.floatx())
     # o1, o2: (BATCH_SIZE, EMBED_SIZE)
     o1 = K.mean(o1t, axis=1)
     o2 = K.mean(o2t, axis=1)
     # merge the attention vectors according to merge_mode
     if self.merge_mode == "concat":
         return concatenate([o1, o2], axis=1)
     elif self.merge_mode == "diff":
         return add([o1, -o2])
     elif self.merge_mode == "prod":
         return multiply([o1, o2])
     elif self.merge_mode == "avg":
         return average([o1, o2])
     else:  # max
         return maximum([o1, o2])
def CompoundNet_VGG19(include_top=True,
                      weights=None,
                      input_tensor=None,
                      input_shape=None,
                      fusion_strategy='concatenate',
                      mode='fine_tuning',
                      pooling_mode='avg',
                      classes=9,
                      data_augm_enabled=False):
    """Instantiates the CompoundNet VGG19 architecture fine-tuned (2 steps) on Human Rights Archive dataset.

        Optionally loads weights pre-trained on Human Rights Archive Database.

        # Arguments
            include_top: whether to include the 3 fully-connected
                layers at the top of the network.
            weights: one of `None` (random initialization),
                'HRA' (pre-training on Human Rights Archive),
                or the path to the weights file to be loaded.
            input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
                to use as image input for the model.
            input_shape: optional shape tuple, only to be specified
                if `include_top` is False (otherwise the input shape
                has to be `(224, 224, 3)` (with `channels_last` data format)
                or `(3, 224, 224)` (with `channels_first` data format).
                It should have exactly 3 input channels,
                and width and height should be no smaller than 48.
                E.g. `(200, 200, 3)` would be one valid value.
            fusion_strategy: one of `concatenate` (feature vectors of different sources are concatenated into one super-vector),
                `average` (the feature set is averaged)
                or `maximum` (selects the highest value from the corresponding features).
            mode: one of `feature_extraction` (freeze all but the penultimate layer and re-train the last Dense layer)
                or `fine_tuning` (unfreeze the lower convolutional layers and retrain more layers).
            pooling_mode: Optional pooling_mode mode for feature extraction
                when `include_top` is `False`.
                - `None` means that the output of the model will be
                    the 4D tensor output of the
                    last convolutional layer.
                - `avg` means that global average pooling_mode
                    will be applied to the output of the
                    last convolutional layer, and thus
                    the output of the model will be a 2D tensor.
                - `max` means that global max pooling_mode will
                    be applied.
            classes: optional number of classes to classify images into,
                                only to be specified if `weights` argument is `None`.
            data_augm_enabled: whether to use the augmented samples during training.

        # Returns
            A Keras model instance.

        # Raises
            ValueError: in case of invalid argument for `weights`.
        """

    if not (weights in {'HRA', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `HRA` '
                         '(pre-training on Human Rights Archive), '
                         'or the path to the weights file to be loaded.')

    if not (fusion_strategy in {'concatenate', 'average', 'maximum'}):
        raise ValueError(
            'The `fusion_strategy` argument should be either '
            '`concatenate` (feature vectors of different sources are concatenated into one super-vector), '
            '`average` (the feature set is averaged) '
            'or `maximum` (selects the highest value from the corresponding features).'
        )

    if not (pooling_mode in {'avg', 'max', 'flatten'}):
        raise ValueError('The `pooling_mode` argument should be either '
                         '`avg` (GlobalAveragePooling2D), `max` '
                         '(GlobalMaxPooling2D), '
                         'or `flatten` (Flatten).')

    if weights == 'HRA' and classes != 9:
        raise ValueError(
            'If using `weights` as Human Rights Archive, `classes` should be 9.'
        )

    cache_subdir = 'HRA_models'

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=48,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    input_tensor = Input(shape=(224, 224, 3))

    object_centric_model = VGG19(input_tensor=input_tensor,
                                 weights='imagenet',
                                 include_top=False)

    scene_centric_model = VGG16_Places365(input_tensor=input_tensor,
                                          weights='places',
                                          include_top=False)

    # retrieve the ouputs
    object_model_output = object_centric_model.output
    scene_model_output = scene_centric_model.output

    # We will feed the extracted features to a merging layer
    if fusion_strategy == 'concatenate':
        merged = concatenate([object_model_output, scene_model_output])

    elif fusion_strategy == 'average':
        merged = average([object_model_output, scene_model_output])

    else:
        merged = maximum([object_model_output, scene_model_output])

    if include_top:
        if pooling_mode == 'avg':
            x = GlobalAveragePooling2D(name='GAP')(merged)
        elif pooling_mode == 'max':
            x = GlobalMaxPooling2D(name='GMP')(merged)
        elif pooling_mode == 'flatten':
            x = Flatten(name='FLATTEN')(merged)

        x = Dense(256, activation='relu',
                  name='FC1')(x)  # let's add a fully-connected layer

        # When random init is enabled, we want to include Dropout,
        # otherwise when loading a pre-trained HRA model we want to omit
        # Dropout layer so the visualisations are done properly (there is an issue if it is included)
        if weights is None:
            x = Dropout(0.5, name='DROPOUT')(x)
        # and a logistic layer with the number of classes defined by the `classes` argument
        x = Dense(classes, activation='softmax',
                  name='PREDICTIONS')(x)  # new softmax layer

    # Ensure that the model takes into account any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # this is the transfer learning model we will train
    model = Model(inputs=inputs, outputs=x, name='CompoundNet-VGG19')

    # load weights
    if weights == 'HRA':
        if include_top:
            if mode == 'feature_extraction':
                for layer in object_centric_model.layers:
                    layer.trainable = False

                for layer in scene_centric_model.layers:
                    layer.trainable = False

                model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                              loss='categorical_crossentropy')

                if data_augm_enabled:

                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_fname,
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_fname,
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_fname,
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_fname,
                                AUGM_FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)
                else:
                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_fname,
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_fname,
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_fname,
                                FEATURE_EXTRACTION_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_fname,
                                FEATURE_EXTRACTION_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_fname,
                                FEATURE_EXTRACTION_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_fname,
                                FEATURE_EXTRACTION_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_fname,
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_fname,
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_fname,
                                FEATURE_EXTRACTION_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

            elif mode == 'fine_tuning':
                for layer in model.layers[:36]:
                    layer.trainable = False
                for layer in model.layers[36:]:
                    layer.trainable = True

                model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                              loss='categorical_crossentropy')

                if data_augm_enabled:

                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_fname,
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_FLATTEN_fname,
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_fname,
                                AUGM_FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_AVERAGE_FUSION_AVG_POOL_fname,
                                AUGM_FINE_TUNING_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_AVERAGE_FUSION_FLATTEN_fname,
                                AUGM_FINE_TUNING_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_AVERAGE_FUSION_MAX_POOL_fname,
                                AUGM_FINE_TUNING_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_fname,
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_FLATTEN_fname,
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_fname,
                                AUGM_FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)
                else:

                    if fusion_strategy == 'concatenate':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_fname,
                                FINE_TUNING_CONCATENATE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FINE_TUNING_CONCATENATE_FUSION_FLATTEN_fname,
                                FINE_TUNING_CONCATENATE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_fname,
                                FINE_TUNING_CONCATENATE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'average':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FINE_TUNING_AVERAGE_FUSION_AVG_POOL_fname,
                                FINE_TUNING_AVERAGE_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FINE_TUNING_AVERAGE_FUSION_FLATTEN_fname,
                                FINE_TUNING_AVERAGE_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FINE_TUNING_AVERAGE_FUSION_MAX_POOL_fname,
                                FINE_TUNING_AVERAGE_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                    elif fusion_strategy == 'maximum':
                        if pooling_mode == 'avg':
                            weights_path = get_file(
                                FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_fname,
                                FINE_TUNING_MAXIMUM_FUSION_AVG_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'flatten':
                            weights_path = get_file(
                                FINE_TUNING_MAXIMUM_FUSION_FLATTEN_fname,
                                FINE_TUNING_MAXIMUM_FUSION_FLATTEN_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

                        elif pooling_mode == 'max':
                            weights_path = get_file(
                                FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_fname,
                                FINE_TUNING_MAXIMUM_FUSION_MAX_POOL_WEIGHTS_PATH,
                                cache_subdir=cache_subdir)

        else:
            if fusion_strategy == 'average':
                weights_path = get_file(
                    FINE_TUNING_AVERAGE_FUSION_NO_TOP_fname,
                    FINE_TUNING_AVERAGE_FUSION_WEIGHTS_PATH_NO_TOP,
                    cache_subdir=cache_subdir)

            elif fusion_strategy == 'concatenate':
                weights_path = get_file(
                    FINE_TUNING_CONCATENATE_FUSION_NO_TOP_fname,
                    FINE_TUNING_CONCATENATE_FUSION_WEIGHTS_PATH_NO_TOP,
                    cache_subdir=cache_subdir)

            elif fusion_strategy == 'maximum':
                weights_path = get_file(
                    FINE_TUNING_MAXIMUM_FUSION_NO_TOP_fname,
                    FINE_TUNING_MAXIMUM_FUSION_WEIGHTS_PATH_NO_TOP,
                    cache_subdir=cache_subdir)

        model.load_weights(weights_path)

    return model
Esempio n. 5
0
def compoundNet_feature_extraction(object_centric_model='VGG16',
                                   scene_centric_model='VGG16_Places365',
                                   fusion_strategy='concatenate',
                                   pooling_mode='avg',
                                   classes=9,
                                   data_augm_enabled=False):
    """ConvNet as fixed feature extractor, consist of taking the convolutional base of a previously-trained network,
    running the new data through it, and training a new classifier on top of the output.
    (i.e. train only the randomly initialized top layers while freezing all convolutional layers of the original model).

    # Arguments
        object_centric_model: one of `VGG16`, `VGG19` or `ResNet50`
        scene_centric_model: `VGG16_Places365`
        fusion_strategy: one of `concatenate` (feature vectors of different sources are concatenated into one super-vector),
            `average` (the feature set is averaged) or `maximum` (selects the highest value from the corresponding features).
        pooling_mode: Optional pooling_mode mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling_mode
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling_mode will
                be applied.
        classes: optional number of classes to classify images into,
                            only to be specified if `weights` argument is `None`.
        data_augm_enabled: whether to use the augmented samples during training.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `object_centric_model`, `pooling_mode`,
        `fusion_strategy` , `scene_centric_model` or invalid input shape.
    """

    if not (object_centric_model in {'VGG16', 'VGG19', 'ResNet50'}):
        raise ValueError(
            'The `scene_centric_model` argument should be either '
            '`VGG16`, `VGG19` or `ResNet50`. Other models will be supported in future releases. '
        )

    if not (pooling_mode in {'avg', 'max', 'flatten'}):
        raise ValueError('The `pooling_mode` argument should be either '
                         '`avg` (GlobalAveragePooling2D), `max` '
                         '(GlobalMaxPooling2D), '
                         'or `flatten` (Flatten).')

    if not (fusion_strategy in {'concatenate', 'average', 'maximum'}):
        raise ValueError(
            'The `fusion_strategy` argument should be either '
            '`concatenate` (feature vectors of different sources are concatenated into one super-vector),'
            ' `average` (the feature set is averaged) '
            'or `maximum` (selects the highest value from the corresponding features).'
        )

    if not (scene_centric_model in {'VGG16_Places365'}):
        raise ValueError(
            'The `scene_centric_model` argument should be '
            '`VGG16_Places365`. Other models will be supported in future releases.'
        )

    # Define the name of the model and its weights
    weights_name = 'compoundNet_feature_extraction_' \
                   + object_centric_model + '_' \
                   + fusion_strategy + '_fusion_' \
                   + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5'


    augm_samples_weights_name = 'augm_compoundNet_feature_extraction_' \
                                + object_centric_model + '_' \
                                + fusion_strategy + '_fusion_' \
                                + pooling_mode + '_pool_weights_tf_dim_ordering_tf_kernels.h5'

    model_log = logs_dir + 'compoundNet_feature_extraction_' \
                                + object_centric_model + '_' \
                                + fusion_strategy + '_fusion_' \
                                + pooling_mode + '_pool_log.csv'
    csv_logger = CSVLogger(model_log, append=True, separator=',')

    input_tensor = Input(shape=(224, 224, 3))

    # create the base object_centric_model pre-trained model for warm-up
    if object_centric_model == 'VGG16':
        object_base_model = VGG16(input_tensor=input_tensor,
                                  weights='imagenet',
                                  include_top=False)

    elif object_centric_model == 'VGG19':
        object_base_model = VGG19(input_tensor=input_tensor,
                                  weights='imagenet',
                                  include_top=False)

    elif object_centric_model == 'ResNet50':
        tmp_model = ResNet50(input_tensor=input_tensor,
                             weights='imagenet',
                             include_top=False)
        object_base_model = Model(
            inputs=tmp_model.input,
            outputs=tmp_model.get_layer('activation_48').output)

    print('\n \n')
    print('The plain, object-centric `' + object_centric_model +
          '` pre-trained convnet was successfully initialised.\n')

    scene_base_model = VGG16_Places365(input_tensor=input_tensor,
                                       weights='places',
                                       include_top=False)

    print('The plain, scene-centric `' + scene_centric_model +
          '` pre-trained convnet was successfully initialised.\n')

    # retrieve the ouputs
    object_base_model_output = object_base_model.output
    scene_base_model_output = scene_base_model.output

    # We will feed the extracted features to a merging layer
    if fusion_strategy == 'concatenate':
        merged = concatenate(
            [object_base_model_output, scene_base_model_output])

    elif fusion_strategy == 'average':
        merged = average([object_base_model_output, scene_base_model_output])

    else:
        merged = maximum([object_base_model_output, scene_base_model_output])

    if pooling_mode == 'avg':
        x = GlobalAveragePooling2D(name='GAP')(merged)
    elif pooling_mode == 'max':
        x = GlobalMaxPooling2D(name='GMP')(merged)
    elif pooling_mode == 'flatten':
        x = Flatten(name='FLATTEN')(merged)

    x = Dense(256, activation='relu',
              name='FC1')(x)  # let's add a fully-connected layer

    # When random init is enabled, we want to include Dropout,
    # otherwise when loading a pre-trained HRA model we want to omit
    # Dropout layer so the visualisations are done properly (there is an issue if it is included)
    x = Dropout(0.5, name='DROPOUT')(x)
    # and a logistic layer with the number of classes defined by the `classes` argument
    predictions = Dense(classes, activation='softmax',
                        name='PREDICTIONS')(x)  # new softmax layer

    # this is the transfer learning model we will train
    model = Model(inputs=object_base_model.input, outputs=predictions)

    print(
        'Randomly initialised classifier was successfully added on top of the merged outputs. \n'
    )

    print(
        'Number of trainable weights before freezing the conv. bases of the respective original models: '
        '' + str(len(model.trainable_weights)))

    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional layers of the preliminary base model
    for layer in object_base_model.layers:
        layer.trainable = False

    for layer in scene_base_model.layers:
        layer.trainable = False

    print(
        'Number of trainable weights after freezing the conv. bases of the respective original models: '
        '' + str(len(model.trainable_weights)))

    print('\n')

    # compile the warm_up_model (should be done *after* setting layers to non-trainable)

    model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    # # The attribute model.metrics_names will give you the display labels for the scalar outputs.
    # print warm_up_model.metrics_names

    if data_augm_enabled:
        print(
            'Using augmented samples for training. This may take a while ! \n')

        t = now()

        history = model.fit_generator(augmented_train_generator,
                                      steps_per_epoch=nb_train_samples //
                                      batch_size,
                                      epochs=feature_extraction_epochs,
                                      callbacks=[csv_logger],
                                      class_weight=class_weight)

        print(
            'Training time for re-training the last Dense layer using augmented samples: %s'
            % (now() - t))

        model.save_weights(feature_extraction_dir + augm_samples_weights_name)
        print('Model weights using augmented samples were saved as `' +
              augm_samples_weights_name + '`')
        print('\n')

    else:
        t = now()
        history = model.fit_generator(train_generator,
                                      steps_per_epoch=nb_train_samples //
                                      batch_size,
                                      epochs=feature_extraction_epochs,
                                      callbacks=[csv_logger],
                                      class_weight=class_weight)

        print('Training time for re-training the last Dense layer: %s' %
              (now() - t))

        model.save_weights(feature_extraction_dir + weights_name)
        print('Model weights were saved as `' + weights_name + '`')
        print('\n')

    return model
Esempio n. 6
0
def ParallelDenseNet121(num_inputs=4,
                        input_size=224,
                        nchannels=3,
                        nb_dense_block=4,
                        growth_rate=32,
                        nb_filter=64,
                        reduction=0.0,
                        dropout_rate=0.0,
                        weight_decay=1e-4,
                        classes=1000,
                        num_gpu=1):
    '''Instantiate the DenseNet 121 architecture,
        # Arguments
            nb_dense_block: number of dense blocks to add to end
            growth_rate: number of filters to add per dense block
            nb_filter: initial number of filters
            reduction: reduction factor of transition blocks.
            dropout_rate: dropout rate
            weight_decay: weight decay factor
            classes: optional number of classes to classify images
            weights_path: path to pre-trained weights
        # Returns
            A Keras model instance.
    '''
    eps = 1.1e-5

    # compute compression factor
    compression = 1.0 - reduction

    # Handle Dimension Ordering for different backends
    global concat_axis
    if K.image_dim_ordering() == 'tf':
        concat_axis = 3
        img_inputs = [
            Input(shape=(input_size, input_size, nchannels), name=f'input_{i}')
            for i in range(num_inputs)
        ]
    else:
        concat_axis = 1
        img_inputs = [
            Input(shape=(nchannels, input_size, input_size), name='data')
            for i in range(num_inputs)
        ]

    # From architecture for ImageNet (Table 1 in the paper)
    nb_layers = [6, 12, 24, 16]  # For DenseNet-121

    # Initial convolution
    init_conv_layers = [
        ZeroPadding2D((3, 3)),
        Convolution2D(nb_filter, 7, 7, subsample=(2, 2), bias=False),
        BatchNormalization(epsilon=eps, axis=concat_axis),
        Scale(axis=concat_axis),
        Activation('relu'),
        ZeroPadding2D((1, 1)),
        MaxPooling2D((3, 3), strides=(2, 2), name='pool1')
    ]
    x = img_inputs.copy()
    x = allocate_layers(x=x, layers=init_conv_layers, num_gpu=num_gpu)
    #     for j in range(len(x)):
    #         for layer in init_conv_layers:
    #             x[j] = layer(x[j])

    # Add dense blocks
    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x,
                                   stage,
                                   nb_layers[block_idx],
                                   nb_filter,
                                   growth_rate,
                                   dropout_rate=dropout_rate,
                                   weight_decay=weight_decay,
                                   num_gpu=num_gpu)

        # Add transition_block
        x = transition_block(x,
                             stage,
                             nb_filter,
                             compression=compression,
                             dropout_rate=dropout_rate,
                             weight_decay=weight_decay,
                             num_gpu=num_gpu)
        nb_filter = int(nb_filter * compression)

    final_stage = stage + 1
    x, nb_filter = dense_block(x,
                               final_stage,
                               nb_layers[-1],
                               nb_filter,
                               growth_rate,
                               dropout_rate=dropout_rate,
                               weight_decay=weight_decay,
                               num_gpu=num_gpu)

    top_activation = 'softmax' if classes > 1 else 'sigmoid'

    finl_conv_layers = [
        BatchNormalization(epsilon=eps, axis=concat_axis),
        Scale(axis=concat_axis),
        Activation('relu'),
        GlobalAveragePooling2D(name='pool' + str(final_stage)),
        Dense(classes),
        Activation(top_activation, name='prob')
    ]
    x = allocate_layers(x=x, layers=finl_conv_layers, num_gpu=num_gpu)
    #     for j in range(len(x)):
    #         for layer in finl_conv_layers:
    #             x[j] = layer(x[j])

    x = merge.maximum(x)
    model = Model(img_inputs, x, name='parallel_dense')

    return model
Esempio n. 7
0
def Model3_LSTM_BiLSTM_LSTM(wordvocabsize,
                            targetvocabsize,
                            charvobsize,
                            word_W,
                            char_W,
                            input_fragment_lenth,
                            input_leftcontext_lenth,
                            input_rightcontext_lenth,
                            input_maxword_length,
                            w2v_k,
                            c2v_k,
                            hidden_dim=200,
                            batch_size=32,
                            optimizer='rmsprop'):
    hidden_dim = 100

    word_input_fragment = Input(shape=(input_fragment_lenth, ), dtype='int32')
    word_embedding_fragment = Embedding(input_dim=wordvocabsize + 1,
                                        output_dim=w2v_k,
                                        input_length=input_fragment_lenth,
                                        mask_zero=False,
                                        trainable=True,
                                        weights=[word_W])(word_input_fragment)
    word_embedding_fragment = Dropout(0.5)(word_embedding_fragment)

    char_input_fragment = Input(shape=(
        input_fragment_lenth,
        input_maxword_length,
    ),
                                dtype='int32')
    char_embedding_fragment = TimeDistributed(
        Embedding(input_dim=charvobsize,
                  output_dim=c2v_k,
                  batch_input_shape=(batch_size, input_fragment_lenth,
                                     input_maxword_length),
                  mask_zero=False,
                  trainable=True,
                  weights=[char_W]))(char_input_fragment)

    char_cnn_fragment = TimeDistributed(
        Conv1D(50, 3, activation='relu', padding='valid'))
    char_embedding_fragment = char_cnn_fragment(char_embedding_fragment)
    char_embedding_fragment = TimeDistributed(
        GlobalMaxPooling1D())(char_embedding_fragment)
    char_embedding_fragment = Dropout(0.25)(char_embedding_fragment)

    word_input_leftcontext = Input(shape=(input_leftcontext_lenth, ),
                                   dtype='int32')
    word_embedding_leftcontext = Embedding(
        input_dim=wordvocabsize + 1,
        output_dim=w2v_k,
        input_length=input_leftcontext_lenth,
        mask_zero=True,
        trainable=True,
        weights=[word_W])(word_input_leftcontext)
    word_embedding_leftcontext = Dropout(0.5)(word_embedding_leftcontext)

    char_input_leftcontext = Input(shape=(
        input_leftcontext_lenth,
        input_maxword_length,
    ),
                                   dtype='int32')
    char_input_rightcontext = Input(shape=(
        input_rightcontext_lenth,
        input_maxword_length,
    ),
                                    dtype='int32')

    word_input_rightcontext = Input(shape=(input_rightcontext_lenth, ),
                                    dtype='int32')
    word_embedding_rightcontext = Embedding(
        input_dim=wordvocabsize + 1,
        output_dim=w2v_k,
        input_length=input_rightcontext_lenth,
        mask_zero=True,
        trainable=True,
        weights=[word_W])(word_input_rightcontext)
    word_embedding_rightcontext = Dropout(0.5)(word_embedding_rightcontext)

    embedding_fragment = concatenate(
        [word_embedding_fragment, char_embedding_fragment], axis=-1)
    embedding_leftcontext = word_embedding_leftcontext
    embedding_rightcontext = word_embedding_rightcontext

    LSTM_leftcontext = LSTM(hidden_dim, go_backwards=False,
                            activation='tanh')(embedding_leftcontext)
    Rep_LSTM_leftcontext = RepeatVector(input_fragment_lenth)(LSTM_leftcontext)
    LSTM_rightcontext = LSTM(hidden_dim, go_backwards=True,
                             activation='tanh')(embedding_rightcontext)
    Rep_LSTM_rightcontext = RepeatVector(input_fragment_lenth)(
        LSTM_rightcontext)

    BiLSTM_fragment = Bidirectional(LSTM(hidden_dim // 2,
                                         activation='tanh',
                                         return_sequences=True),
                                    merge_mode='concat')(embedding_fragment)
    context_ADD = add([LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext])
    context_subtract_l = subtract([BiLSTM_fragment, LSTM_leftcontext])
    context_subtract_r = subtract([BiLSTM_fragment, LSTM_rightcontext])
    context_average = average(
        [LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext])
    context_maximum = maximum(
        [LSTM_leftcontext, BiLSTM_fragment, LSTM_rightcontext])

    embedding_mix = concatenate([
        embedding_fragment, BiLSTM_fragment, context_ADD, context_subtract_l,
        context_subtract_r, context_average, context_maximum
    ],
                                axis=-1)

    # BiLSTM_fragment = Bidirectional(LSTM(hidden_dim // 2, activation='tanh'), merge_mode='concat')(embedding_fragment)

    decoderlayer1 = Conv1D(50, 1, activation='relu', strides=1,
                           padding='same')(embedding_mix)
    decoderlayer2 = Conv1D(50, 2, activation='relu', strides=1,
                           padding='same')(embedding_mix)
    decoderlayer3 = Conv1D(50, 3, activation='relu', strides=1,
                           padding='same')(embedding_mix)
    decoderlayer4 = Conv1D(50, 4, activation='relu', strides=1,
                           padding='same')(embedding_mix)

    CNNs_fragment = concatenate(
        [decoderlayer1, decoderlayer2, decoderlayer3, decoderlayer4], axis=-1)
    CNNs_fragment = Dropout(0.5)(CNNs_fragment)
    CNNs_fragment = GlobalMaxPooling1D()(CNNs_fragment)

    concat = Dropout(0.3)(CNNs_fragment)

    output = Dense(targetvocabsize, activation='softmax')(concat)

    Models = Model([
        word_input_fragment, word_input_leftcontext, word_input_rightcontext,
        char_input_fragment, char_input_leftcontext, char_input_rightcontext
    ], output)

    Models.compile(loss='categorical_crossentropy',
                   optimizer=optimizers.RMSprop(lr=0.001),
                   metrics=['acc'])

    return Models