def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(144, 24, 2, 1)] + [(144, 24, 1, 1)] * 3), resnet_utils.Block('block2', bottleneck, [(288, 144, 2, 1)] + [(288, 144, 1, 1)] * 7), resnet_utils.Block('block3', bottleneck, [(576, 288, 1, 1)] + [(576, 288, 1, 1)] * 3), ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('resnet_v1_xception', 'resnet_v1_xception'): net = resnet_utils.conv2d_same( image, 24, 3, stride=2, scope='conv1') #rate (atrous conv must be delete) net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_xception') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1(net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_xception') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_xception') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) with tf.variable_scope('resnet_v1_xception', 'resnet_v1_xception', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn rpn = slim.conv2d(net_conv3, 256, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") rpn_cls_score = slim.conv2d(rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_bbox_pred = slim.conv2d(rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor height = tf.cast(tf.shape(rpn)[1], tf.float32) width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr(height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) rois, roi_scores = proposal_opr(rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=True) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope('resnet_v1_xception', 'resnet_v1_xception', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): ps_chl = 7 * 7 * 10 ps_fm = rfcn_plus_plus_opr.global_context_module( net_conv4, prefix='conv_new_1', # ks=15, chl_mid=256, chl_out=ps_chl) ks=15, chl_mid=64, chl_out=ps_chl) ps_fm = nn_ops.relu(ps_fm) [psroipooled_rois, _, _] = psalign_pooling_op.psalign_pool(ps_fm, rois, group_size=7, sample_height=2, sample_width=2, spatial_scale=1.0 / 16.0) #[psroipooled_rois, _] = psroi_pooling_op.psroi_pool( # ps_fm, rois, group_size=7, spatial_scale=1.0 / 16.0) psroipooled_rois = slim.flatten(psroipooled_rois) ps_fc_1 = slim.fully_connected(psroipooled_rois, 2048, weights_initializer=initializer, activation_fn=nn_ops.relu, trainable=is_training, scope='ps_fc_1') cls_score = slim.fully_connected(ps_fc_1, cfg.num_classes, weights_initializer=initializer, activation_fn=None, trainable=is_training, scope='cls_fc') bbox_pred = slim.fully_connected( ps_fc_1, 4 * cfg.num_classes, weights_initializer=initializer_bbox, activation_fn=None, trainable=is_training, scope='bbox_fc') cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") #conv_new_1 = slim.conv2d( # net_conv5, 1024, [1, 1], trainable=is_training, # weights_initializer=initializer, activation_fn=nn_ops.relu, # scope="conv_new_1") #rfcn_cls = slim.conv2d( # conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], # trainable=is_training, weights_initializer=initializer, # activation_fn=None, scope="rfcn_cls") #rfcn_bbox = slim.conv2d( # conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, # weights_initializer=initializer, # activation_fn=None, scope="rfcn_bbox") #[psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) #[psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) #cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) #bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) #cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") #bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) if not is_training: stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss
def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('resnet_v1_101', 'resnet_v1_101'): net = resnet_utils.conv2d_same( image, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d( net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1( net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1( net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1( net_conv4, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) with tf.variable_scope( 'resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn rpn = slim.conv2d( net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") rpn_cls_score = slim.conv2d( rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_bbox_pred = slim.conv2d( rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor height = tf.cast(tf.shape(rpn)[1], tf.float32) width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr( height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) rois, roi_scores = proposal_opr( rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=False) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope( 'resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): conv_new_1 = slim.conv2d( net_conv5, 1024, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="conv_new_1") rfcn_cls = slim.conv2d( conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope="rfcn_cls") rfcn_bbox = slim.conv2d( conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, weights_initializer=initializer, activation_fn=None, scope="rfcn_bbox") [psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool( rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) [psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool( rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) if not is_training: stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss
def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs practic astea is alea de mai sus din get_inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode #num_anchors = 15 num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) #banuiesc ca aici defineste structura retelei resnet, sa fie cu bottleneck bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('light_resnet_v1_101', 'light_resnet_v1_101'): net = resnet_utils.conv2d_same(image, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope='light_resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1(net, blocks[1:2], global_pool=False, include_root_block=False, scope='light_resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='light_resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1(net_conv4, blocks[-1:], global_pool=False, include_root_block=False, scope='light_resnet_v1_101') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) with tf.variable_scope('light_resnet_v1_101', 'light_resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn-object detector care foloseste feature-uri din c4. asta face parte din RCNN subnet rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") #asta da scorul claselor rpn_cls_score = slim.conv2d(rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') #asta da coordonatele unui bounding box rpn_bbox_pred = slim.conv2d(rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor #rpn de mai sus cred ca e un layer, care e un tensor. #height e dimensiunea(ca si length) a dimensiunii a doua a tensorului height = tf.cast(tf.shape(rpn)[1], tf.float32) # width e dimensiunea(ca si length) a dimensiunii a treia a tensorului width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr(height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) #asta practic gaseste regiunile de interes rois, roi_scores = proposal_opr(rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=True) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope('light_resnet_v1_101', 'light_resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): ps_chl = 7 * 7 * 10 ps_fm = rfcn_plus_plus_opr.global_context_module( net_conv5, prefix='conv_new_1', ks=15, chl_mid=256, chl_out=ps_chl) ps_fm = nn_ops.relu(ps_fm) #trece prin ceva pooling rois-urile obtinute mai sus [psroipooled_rois, _, _] = psalign_pooling_op.psalign_pool(ps_fm, rois, group_size=7, sample_height=2, sample_width=2, spatial_scale=1.0 / 16.0) #[psroipooled_rois, _] = psroi_pooling_op.psroi_pool( # ps_fm, rois, group_size=7, spatial_scale=1.0 / 16.0) psroipooled_rois = slim.flatten(psroipooled_rois) #asta e single fully connected layer de 2048 de canale de care se vorbeste la inceputul paginii 4 ps_fc_1 = slim.fully_connected(psroipooled_rois, 2048, weights_initializer=initializer, activation_fn=nn_ops.relu, trainable=is_training, scope='ps_fc_1') #asta e pentru scorul claselor, banuiesc ca clasificare cls_score = slim.fully_connected(ps_fc_1, cfg.num_classes, weights_initializer=initializer, activation_fn=None, trainable=is_training, scope='cls_fc') #asta e pentru coordonatele bbox, banuiesc ca regresie. bbox_pred = slim.fully_connected( ps_fc_1, 4 * cfg.num_classes, weights_initializer=initializer_bbox, activation_fn=None, trainable=is_training, scope='bbox_fc') cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") #conv_new_1 = slim.conv2d( # net_conv5, 1024, [1, 1], trainable=is_training, # weights_initializer=initializer, activation_fn=nn_ops.relu, # scope="conv_new_1") #rfcn_cls = slim.conv2d( # conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], # trainable=is_training, weights_initializer=initializer, # activation_fn=None, scope="rfcn_cls") #rfcn_bbox = slim.conv2d( # conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, # weights_initializer=initializer, # activation_fn=None, scope="rfcn_bbox") #[psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) #[psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) #cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) #bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) #cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") #bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) #daca e false, adica modul nu e TRAIN, cum pleaca din person detector initial if not is_training: #np.tile repeda primul argument de atatea ori cat zice al 2-lea stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) #inmulteste valorile din tensorul bbox_pred cu standar deviation si aduna mediile bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss