def build_models(weight_path, init_models=False, train_view_only=False, create_siam=False): ## if train_view_only: trainable_cls = False trainable_view = False else: trainable_cls = False trainable_view = False # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=trainable_cls) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) # classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable_cls=trainable_cls,trainable_view=trainable_view) classifier, inner_layer = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=C.num_classes, trainable_cls=trainable_cls, trainable_view=trainable_view) # L2 normalization for inner layer inner_layer = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(inner_layer) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) if init_models: try: print('loading weights from {}'.format(C.base_net_weights)) model_rpn.load_weights(C.base_net_weights, by_name=True) model_classifier.load_weights(C.base_net_weights, by_name=True) except: print( 'Could not load pretrained model weights. Weights can be found at {} and {}' .format( 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5', 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' )) ## load pre-trained net # roi_helpers.compere_weights(model_classifier.get_weights(),model_rpn.get_weights(),0,0) model_rpn.load_weights(weight_path, by_name=True) model_classifier.load_weights(weight_path, by_name=True) model_rpn.compile(optimizer=optimizer, loss=[ losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors) ]) ##no weights model_classifier.compile(optimizer=optimizer_classifier, loss=[ losses.class_loss_cls, losses.class_loss_regr(C.num_classes - 1), losses.class_loss_view_weight( C.num_classes, roi_num=C.num_rois) ], metrics=['accuracy']) ## with weights # model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1),losses.class_loss_view_weight(len(classes_count),roi_num=C.num_rois)], metrics=['accuracy']) model_all.compile(optimizer='sgd', loss='mae') if create_siam: model_view_only = Model([img_input, roi_input], classifier[2]) model_inner = Model([img_input, roi_input], inner_layer) ## use the feature map after rpn,train only the view module inner_ref = model_inner([img_input_ref, roi_input_ref]) inner_dp = model_inner([img_input_dp, roi_input_dp]) inner_dm = model_inner([img_input_dm, roi_input_dm]) view_ref_base = model_view_only([img_input_ref, roi_input_ref]) view_dp_base = model_view_only([img_input_dp, roi_input_dp]) view_dm_base = model_view_only([img_input_dm, roi_input_dm]) ## first version - l2 distance view_ref = SliceTensor(len(class_mapping), C.num_rois)([view_ref_base, labels_input]) view_dp = SliceTensor(len(class_mapping), C.num_rois)([view_dp_base, labels_input]) view_dm = SliceTensor(len(class_mapping), C.num_rois)([view_dm_base, labels_input]) distance_dp = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)( [view_dp, view_ref]) distance_dm = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)( [view_dm, view_ref]) distance_dp = Lambda(l2_layer, output_shape=l2_layer_output_shape, name='dp_l2_layer')(distance_dp) distance_dm = Lambda(l2_layer, output_shape=l2_layer_output_shape, name='dm_l2_layer')(distance_dm) # trip = Lambda(trip_layer, output_shape=[1, 2], name='concat_layer')([distance_dp, distance_dm]) # should be comperd to [0,1] in MSE trip = Lambda(lambda x: x[0] / (x[1] + K.epsilon()))( [distance_dp, distance_dm]) ## second version for trip distance - cosine distance with softmax # cos_dp = Lambda(cosine_distance, # output_shape=cosine_dist_output_shape)([inner_ref, inner_dp]) # cosine dist <X_ref,X_dp> # # cos_dm = Lambda(cosine_distance, # output_shape=cosine_dist_output_shape)([inner_ref, inner_dm]) # cosine dist <X_ref,X_dm> # # soft_param = K.ones([1,1])*2 # soft_param = tf.Variable(initial_value=[8.]) # # # soft_param = K.repeat(soft_param,2) # dist = Concatenate(axis=2)([cos_dm, cos_dp]) # dist = Lambda(lambda x: x * soft_param)(dist) # trip = Activation('softmax')(dist) # should be comperd to [0,1] becase dp shold be small and dm large so after softmax it # model_trip = Model([img_input_ref, roi_input_ref, img_input_dp, roi_input_dp, img_input_dm, roi_input_dm], trip) # # model_trip.layers[10].trainable_weights.extend([soft_param]) # model_trip.compile(optimizer=optimizer_trip, loss='categorical_crossentropy') ## third version cosine on last layer # slice the currect 360 slice # view_ref = SliceTensor(len(class_mapping),C.num_rois)([view_ref,labels_input]) # view_dp = SliceTensor(len(class_mapping),C.num_rois)([view_dp,labels_input]) # view_dm = SliceTensor(len(class_mapping),C.num_rois)([view_dm,labels_input]) # # # l2 normlize in order to use cosine dist # view_ref = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_ref) # view_dp = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_dp) # view_dm = Lambda(lambda x: tf.nn.l2_normalize(x, dim=2))(view_dm) # # # cos_dp = Lambda(cosine_distance, # output_shape=cosine_dist_output_shape)([view_ref, view_dp]) # cosine dist <X_ref,X_dp> # # cos_dm = Lambda(cosine_distance, # output_shape=cosine_dist_output_shape)([view_ref, view_dm]) # cosine dist <X_ref,X_dm> # # soft_param = K.ones([1,1])*2 # soft_param = tf.Variable(initial_value=[8.]) # # # soft_param = K.repeat(soft_param,2) # dist = Concatenate(axis=2)([cos_dm, cos_dp]) # dist = Lambda(lambda x: x * soft_param)(dist) # trip = Activation('softmax')(dist) # should be comperd to [0,1] becase dp shold be small and dm large so after softmax it model_trip = Model([ img_input_ref, roi_input_ref, img_input_dp, roi_input_dp, img_input_dm, roi_input_dm, labels_input ], [view_ref_base, trip]) # model_trip.layers[10].trainable_weights.extend([soft_param]) ## cosine # model_trip.compile(optimizer=optimizer_trip, loss=[losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),losses.class_loss_view_weight(C.num_classes,roi_num=C.num_rois),'categorical_crossentropy']) ## l2 model_trip.compile(optimizer=optimizer_trip, loss=[ losses.class_loss_view_weight( C.num_classes, roi_num=C.num_rois), 'mse' ]) return model_view_only, model_inner, model_trip else: return model_rpn, model_classifier, model_all
input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) base_output = nn.nn_base(img_input, trainable=True) model_base = Model(img_input, base_output) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(base_output, num_anchors) classifier_only, inner = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=21, trainable_cls=False, trainable_view=False) model_classifier_only = Model([feature_map_input, roi_input], classifier_only) model_inner = Model([feature_map_input, roi_input], inner) model_rpn = Model(img_input, rpn) model_rpn_features = Model(img_input, rpn[2:]) model_classifier = Model([feature_map_input, roi_input], classifier_only) # model_base.load_weights(weight_path, by_name=True) model_classifier_only.load_weights(weight_path, by_name=True) model_rpn.load_weights(weight_path, by_name=True) model_classifier.load_weights(weight_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse')
input_shape_img = (None, None, 3) input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier, _ = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping)) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) model_rpn.load_weights(weight_path, by_name=True) model_classifier.load_weights(weight_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') all_imgs = []
else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=False) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier, _ = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable_cls=False, trainable_view=False) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) init_weights = os.path.join(base_path, 'models/model_FC_init.hdf5') # init_weights = os.path.join(base_path,'model_frcnn_simple_just_train_pascal_flip.hdf5') try: print('loading weights from {}'.format(C.base_net_weights)) model_rpn.load_weights(C.base_net_weights, by_name=True) model_classifier.load_weights(C.base_net_weights, by_name=True)