input_shape_img = (3, None, None)
    input_shape_features = (1024, None, None)
else:
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier, _ = nn.classifier(feature_map_input,
                              roi_input,
                              C.num_rois,
                              nb_classes=len(class_mapping))

model_rpn = Model(img_input, rpn_layers)
model_classifier_only = Model([feature_map_input, roi_input], classifier)

model_classifier = Model([feature_map_input, roi_input], classifier)

model_rpn.load_weights(weight_path, by_name=True)
model_classifier.load_weights(weight_path, by_name=True)

model_rpn.compile(optimizer='sgd', loss='mse')
Esempio n. 2
0
def build_models(weight_path,
                 init_models=False,
                 train_view_only=False,
                 create_siam=False):
    ##
    if train_view_only:
        trainable_cls = False
        trainable_view = False
    else:
        trainable_cls = False
        trainable_view = False
    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=trainable_cls)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    # classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable_cls=trainable_cls,trainable_view=trainable_view)
    classifier, inner_layer = nn.classifier(shared_layers,
                                            roi_input,
                                            C.num_rois,
                                            nb_classes=C.num_classes,
                                            trainable_cls=trainable_cls,
                                            trainable_view=trainable_view)

    # L2 normalization for inner layer
    inner_layer = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(inner_layer)

    model_rpn = Model(img_input, rpn[:2])

    model_classifier = Model([img_input, roi_input], classifier)
    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    if init_models:
        try:
            print('loading weights from {}'.format(C.base_net_weights))
            model_rpn.load_weights(C.base_net_weights, by_name=True)
            model_classifier.load_weights(C.base_net_weights, by_name=True)
        except:
            print(
                'Could not load pretrained model weights. Weights can be found at {} and {}'
                .format(
                    'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
                    'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
                ))

    ## load pre-trained net

    # roi_helpers.compere_weights(model_classifier.get_weights(),model_rpn.get_weights(),0,0)
    model_rpn.load_weights(weight_path, by_name=True)
    model_classifier.load_weights(weight_path, by_name=True)

    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    ##no weights
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[
                                 losses.class_loss_cls,
                                 losses.class_loss_regr(C.num_classes - 1),
                                 losses.class_loss_view_weight(
                                     C.num_classes, roi_num=C.num_rois)
                             ],
                             metrics=['accuracy'])
    ## with weights
    # model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1),losses.class_loss_view_weight(len(classes_count),roi_num=C.num_rois)], metrics=['accuracy'])

    model_all.compile(optimizer='sgd', loss='mae')

    if create_siam:
        model_view_only = Model([img_input, roi_input], classifier[2])
        model_inner = Model([img_input, roi_input], inner_layer)

        ## use the feature map after rpn,train only the view module
        inner_ref = model_inner([img_input_ref, roi_input_ref])
        inner_dp = model_inner([img_input_dp, roi_input_dp])
        inner_dm = model_inner([img_input_dm, roi_input_dm])
        view_ref_base = model_view_only([img_input_ref, roi_input_ref])
        view_dp_base = model_view_only([img_input_dp, roi_input_dp])
        view_dm_base = model_view_only([img_input_dm, roi_input_dm])

        ## first version - l2 distance

        view_ref = SliceTensor(len(class_mapping),
                               C.num_rois)([view_ref_base, labels_input])
        view_dp = SliceTensor(len(class_mapping),
                              C.num_rois)([view_dp_base, labels_input])
        view_dm = SliceTensor(len(class_mapping),
                              C.num_rois)([view_dm_base, labels_input])

        distance_dp = Lambda(euclidean_distance,
                             output_shape=eucl_dist_output_shape)(
                                 [view_dp, view_ref])

        distance_dm = Lambda(euclidean_distance,
                             output_shape=eucl_dist_output_shape)(
                                 [view_dm, view_ref])
        distance_dp = Lambda(l2_layer,
                             output_shape=l2_layer_output_shape,
                             name='dp_l2_layer')(distance_dp)
        distance_dm = Lambda(l2_layer,
                             output_shape=l2_layer_output_shape,
                             name='dm_l2_layer')(distance_dm)

        # trip = Lambda(trip_layer, output_shape=[1, 2], name='concat_layer')([distance_dp, distance_dm]) # should be comperd to [0,1] in MSE
        trip = Lambda(lambda x: x[0] / (x[1] + K.epsilon()))(
            [distance_dp, distance_dm])

        ## second version for trip distance - cosine distance with softmax
        # cos_dp = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([inner_ref, inner_dp])  # cosine dist <X_ref,X_dp>
        #
        # cos_dm = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([inner_ref, inner_dm])  # cosine dist <X_ref,X_dm>
        # # soft_param = K.ones([1,1])*2
        # soft_param = tf.Variable(initial_value=[8.])
        #
        # # soft_param = K.repeat(soft_param,2)
        # dist = Concatenate(axis=2)([cos_dm, cos_dp])
        # dist = Lambda(lambda x: x * soft_param)(dist)
        # trip = Activation('softmax')(dist)  # should be comperd to [0,1] becase dp shold be small and dm large so after softmax it
        # model_trip = Model([img_input_ref, roi_input_ref, img_input_dp, roi_input_dp, img_input_dm, roi_input_dm], trip)
        # # model_trip.layers[10].trainable_weights.extend([soft_param])
        # model_trip.compile(optimizer=optimizer_trip, loss='categorical_crossentropy')

        ## third version cosine on last layer
        # slice the currect 360 slice
        # view_ref = SliceTensor(len(class_mapping),C.num_rois)([view_ref,labels_input])
        # view_dp = SliceTensor(len(class_mapping),C.num_rois)([view_dp,labels_input])
        # view_dm = SliceTensor(len(class_mapping),C.num_rois)([view_dm,labels_input])
        #
        # # l2 normlize in order to use cosine dist
        # view_ref = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_ref)
        # view_dp = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_dp)
        # view_dm = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_dm)
        #
        #
        # cos_dp = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([view_ref, view_dp])  # cosine dist <X_ref,X_dp>
        #
        # cos_dm = Lambda(cosine_distance,
        # 				output_shape=cosine_dist_output_shape)([view_ref, view_dm])  # cosine dist <X_ref,X_dm>
        # # soft_param = K.ones([1,1])*2
        # soft_param = tf.Variable(initial_value=[8.])
        #
        # # soft_param = K.repeat(soft_param,2)
        # dist = Concatenate(axis=2)([cos_dm, cos_dp])
        # dist = Lambda(lambda x: x * soft_param)(dist)
        # trip = Activation('softmax')(dist)  # should be comperd to [0,1] becase dp shold be small and dm large so after softmax it

        model_trip = Model([
            img_input_ref, roi_input_ref, img_input_dp, roi_input_dp,
            img_input_dm, roi_input_dm, labels_input
        ], [view_ref_base, trip])
        # model_trip.layers[10].trainable_weights.extend([soft_param])

        ## cosine
        # model_trip.compile(optimizer=optimizer_trip, loss=[losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),'categorical_crossentropy'])

        ## l2
        model_trip.compile(optimizer=optimizer_trip,
                           loss=[
                               losses.class_loss_view_weight(
                                   C.num_classes, roi_num=C.num_rois), 'mse'
                           ])

        return model_view_only, model_inner, model_trip

    else:
        return model_rpn, model_classifier, model_all
Esempio n. 3
0
    input_shape_img = (3, None, None)
    input_shape_features = (1024, None, None)
else:
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

base_output = nn.nn_base(img_input, trainable=True)
model_base = Model(img_input, base_output)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(base_output, num_anchors)

classifier_only, inner = nn.classifier(feature_map_input,
                                       roi_input,
                                       C.num_rois,
                                       nb_classes=21,
                                       trainable_cls=False,
                                       trainable_view=False)
model_classifier_only = Model([feature_map_input, roi_input], classifier_only)

model_inner = Model([feature_map_input, roi_input], inner)
model_rpn = Model(img_input, rpn)
model_rpn_features = Model(img_input, rpn[2:])

model_classifier = Model([feature_map_input, roi_input], classifier_only)
# model_base.load_weights(weight_path, by_name=True)