def get_vgg_text_rpn(data, label, bbox_target, bbox_weight, num_anchors=10): """ Region Proposal Network with VGG :param num_anchors: used to determine output size :return: Symbol """ # shared convolutional layers relu5_3 = get_mobilenet_text_conv(data) _,relu_shape,_ = relu5_3.infer_shape_partial() shape = relu_shape[0] # RPN rpn_conv = mx.symbol.Convolution( data=relu5_3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3") bilstm = rnn.BidirectionalCell( rnn.LSTMCell(128, prefix="l_"), rnn.LSTMCell(128, prefix='r_') ) #mx.symbol.RNN rpn_conv_t = mx.symbol.transpose(rpn_conv, axes=(0, 2, 3, 1)) rpn_conv_t = mx.symbol.reshape(rpn_conv_t, shape=(-3, -2)) lstm_o, _ = bilstm.unroll(shape[0] * shape[3], rpn_conv_t, layout='NTC', merge_outputs=True) pred = mx.sym.FullyConnected(data=lstm_o, num_hidden=512, flatten=False, name='lstm_proj') pred = mx.sym.reshape(pred, shape=(shape[0], shape[2], shape[3], 512)) pred = mx.symbol.transpose(pred, axes=(0, 3, 1, 2)) rpn_relu = mx.symbol.Activation(data=pred, act_type="relu", name="rpn_relu") rpn_cls_score = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score") rpn_bbox_pred = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred") # prepare rpn data rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape") # classification cls_prob = mx.symbol.SoftmaxOutput(data=rpn_cls_score_reshape, label=label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, name="cls_prob") # bounding box regression bbox_loss_ = bbox_weight * mx.symbol.smooth_l1(name='bbox_loss_', scalar=3.0, data=(rpn_bbox_pred - bbox_target)) bbox_loss = mx.sym.MakeLoss(name='bbox_loss', data=bbox_loss_, grad_scale=1.0 / config.TRAIN.RPN_BATCH_SIZE) # group output group = mx.symbol.Group([cls_prob, bbox_loss]) return group
def get_vgg_text_rpn_test(data, im_info, num_anchors=10): """ Region Proposal Network with VGG :param num_anchors: used to determine output size :return: Symbol """ # shared convolutional layers relu5_3 = get_mobilenet_text_conv(data) _,relu_shape,_ = relu5_3.infer_shape_partial() shape = relu_shape[0] # RPN rpn_conv = mx.symbol.Convolution( data=relu5_3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3") bilstm = rnn.BidirectionalCell( rnn.LSTMCell(128, prefix="l_"), rnn.LSTMCell(128, prefix='r_') ) rpn_conv_t = mx.symbol.transpose(rpn_conv, axes=(0, 2, 3, 1)) rpn_conv_t = mx.symbol.reshape(rpn_conv_t, shape=(-3, -2)) lstm_o, _ = bilstm.unroll(shape[0] * shape[3], rpn_conv_t, layout='NTC', merge_outputs=True) pred = mx.sym.FullyConnected(data=lstm_o, num_hidden=512, flatten=False, name='lstm_proj') pred = mx.sym.reshape(pred, shape=(shape[0], shape[2], shape[3], 512)) pred = mx.symbol.transpose(pred, axes=(0, 3, 1, 2)) rpn_relu = mx.symbol.Activation(data=pred, act_type="relu", name="rpn_relu") rpn_cls_score = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score") rpn_bbox_pred = mx.symbol.Convolution( data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred") # ROI Proposal rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape") rpn_cls_prob = mx.symbol.SoftmaxActivation( data=rpn_cls_score_reshape, mode="channel", name="rpn_cls_prob") rpn_cls_prob_reshape = mx.symbol.Reshape( data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape') if config.TEST.CXX_PROPOSAL: rois = mx.symbol.contrib.Proposal( cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois', output_score=True, feature_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS), rpn_pre_nms_top_n=config.TEST.PROPOSAL_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TEST.PROPOSAL_POST_NMS_TOP_N, threshold=config.TEST.PROPOSAL_NMS_THRESH, rpn_min_size=config.TEST.PROPOSAL_MIN_SIZE) else: rois = mx.symbol.Custom( cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois', output_score=True, op_type='proposal', feat_stride=config.RPN_FEAT_STRIDE, scales=tuple(config.ANCHOR_SCALES), ratios=tuple(config.ANCHOR_RATIOS), rpn_pre_nms_top_n=config.TEST.PROPOSAL_PRE_NMS_TOP_N, rpn_post_nms_top_n=config.TEST.PROPOSAL_POST_NMS_TOP_N, threshold=config.TEST.PROPOSAL_NMS_THRESH, rpn_min_size=config.TEST.PROPOSAL_MIN_SIZE) # rois = group[0] # score = group[1] group = rois #mx.sym.Group([rois, rpn_cls_prob_reshape, rpn_bbox_pred]) return group