Example #1
0
    def forward(self, inputs):

        if self.training:
            imgs, annotations = inputs
        else:
            imgs = inputs

        x = self.conv1(imgs)

        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)

        features = self.fpn([x2, x3, x4])

        reg_feats = torch.cat([self.regression(feat) for feat in features],
                              dim=1)

        cls_feats = torch.cat([self.classification(feat) for feat in features],
                              dim=1)

        anchors = self.anchors(imgs)

        if self.training:
            return self.focalLoss(cls_feats, reg_feats, anchors, annotations)
        else:
            refined_anchors = self.regressBoxes(anchors, reg_feats)
            refined_anchors = self.clipBoxes(refined_anchors, imgs)

            scores, _ = torch.max(cls_feats, dim=2, keepdim=True)

            scores_over_thresh = (scores > 0.05)[0, :, 0]

            if scores_over_thresh.sum() == 0:
                return torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)

            cls_feats = cls_feats[:, scores_over_thresh, :]
            refined_anchors = refined_anchors[:, scores_over_thresh, :]
            scores = scores[:, scores_over_thresh, :]

            anchors_nms_idx, _ = gpu_nms(
                torch.cat([refined_anchors, scores], dim=2)[0, :, :], 0.5)

            nms_scores, nms_class = cls_feats[0, anchors_nms_idx, :].max(dim=1)

            return [
                nms_scores, nms_class, refined_anchors[0, anchors_nms_idx, :]
            ]
Example #2
0
    def nms(self, boxes, scores, classes, confidence, nms_threshold):
        num_classes = len(self.class_names)
        scores_conv = np.zeros((scores.shape[0], num_classes))
        for i, c_idx in enumerate(classes):
            scores_conv[i][c_idx] = scores[i]

        use_gpu_nms = False
        if use_gpu_nms:
            boxes, scores, classes = nms.gpu_nms(boxes,
                                                 scores_conv,
                                                 num_classes,
                                                 max_boxes=50,
                                                 score_thresh=confidence,
                                                 iou_thresh=nms_threshold)
            boxes, scores, classes = self.sess.run([boxes, scores, classes])
        else:
            boxes, scores, classes = nms.cpu_nms(boxes,
                                                 scores_conv,
                                                 num_classes,
                                                 max_boxes=50,
                                                 score_thresh=confidence,
                                                 iou_thresh=nms_threshold)
        return boxes, scores, classes
Example #3
0
 def _nms(dets):
     return nms.gpu_nms(dets, thresh, device_id)
Example #4
0
cv2_path = '/media/disk1/yangfan/opencv-2.4.13.2/lib'
sys.path.insert(0, cv2_path)
import cv2

import mxnet as mx
print mx.__version__
from mtcnn_detector import MtcnnDetector
from time import time
from nms.gpu_nms import *

from config import GPU_ID

if True:
    boxes = np.zeros((10, 5))
    boxes = boxes.astype('float32')
    pick = gpu_nms(boxes, float(0.7), GPU_ID)
    threshold = [0.5, 0.5, 0.6]
    ctx = mx.gpu(GPU_ID)
    print ctx
    detector = MtcnnDetector(model_folder='model',
                             ctx=ctx,
                             num_worker=20,
                             threshold=threshold,
                             accurate_landmark=True,
                             minsize=40)
    detect_face = detector.detect_face
    age_dict = {}
    age_dict[0] = [0, 2]
    age_dict[1] = [3, 7]
    age_dict[2] = [8, 13]
    age_dict[3] = [14, 18]
Example #5
0
def detect_first_stage(img, index, threshold, ctx):
    #    return None
    """
        run PNet for first stage
    
    Parameters:
    ----------
        img: numpy array, bgr order
            input image
        scale: float number
            how much should the input image scale
        net: PNet
            worker
    Returns:
    -------
        total_boxes : bboxes
    """
    #   print index
    scale = real_scales[index]
    height, width, _ = img.shape
    hs = int(height * scale)
    ws = int(width * scale)

    # img = mx.nd.array(img)
    # im_data = mx.image.imresize(img, hs, ws)
    im_data = cv2.resize(img, (ws, hs))

    # adjust for the network input
    input_buf = adjust_input(im_data)
    #    print 'prepare data:%.4f'%(end_time - start_time)
    # print input_buf.shape
    #  output = net.predict(input_buf)

    #net.forward(data = mx.nd.array(input_buf))

    # start_time = time()
    # data_shape = [("data", input_buf.shape)]
    # input_shapes = dict(data_shape)
    # executor = net.simple_bind(ctx = ctx, **input_shapes)
    # for key in executor.arg_dict.keys():
    #     if key in arg_params:
    #         arg_params[key].copyto(executor.arg_dict[key])

    #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/'

    #  end_time = time()
    #  print 'binding parameters: %.2f'%(end_time - start_time)

    # start_time = time()
    # data_shape = [("data", input_buf.shape)]
    # input_shapes = dict(data_shape)
    # executor = executor.reshape(allow_up_sizing = True, **input_shapes)
    # end_time = time()

    #print 'reshape time %.4f'%(end_time - start_time)

    real_executors[index].forward(is_train=False, data=input_buf)
    output = real_executors[index].outputs[0].asnumpy()
    if has_reg == True:
        reg = real_executors[index].outputs[1].asnumpy()
#  print 'test1'
#  print output.shape
#  print 'scale:%.2f, time:%.4f'%(scale, end_time - start_time)
    output_hs = ((hs - 2) / 2) - 2 - 2
    output_ws = ((ws - 2) / 2) - 2 - 2

    #  print output_hs
    #  print output_ws
    #  for i in range(output.shape[1]):
    #      for j in range(output.shape[2]):
    #          for k in range(output.shape[3]):
    #              if output[0][i][j][k] > 0.9:
    #                  print '%d, %d, %d' %(i, j, k)
    #result =  np.where(output[0] > 0.9)
    #result[0]
    #output = np.transpose(output, (0, 3, 1, 2))
    # output = output.reshape((1, output_hs, output_ws, 2))
    #  print output[0, 1, :, :]
    if has_reg == True:
        boxes = generate_bbox(output[0][1, :, :], reg, scale, threshold)
    else:
        boxes = generate_bbox(output[0][1, :, :], output[0], scale, threshold)
#    print 'generated bbox: %d'%(len(boxes))
    if boxes.size == 0:
        return None

#  print 'test2'
# nms
#print 'generating box time: %.4f'%(end_time - start_time)
#print 'generating box:%d'%(boxes.shape[0])
#pick = nms(boxes[:,0:5], 0.5, mode='Union')
# boxes.dtype = 'float32'
    boxes = boxes.astype('float32')
    pick = gpu_nms(boxes[:, 0:5], 0.5, int(ctx.__str__()[4]))
    #print pick
    #   print 'nms:' + str(len(pick))
    boxes = boxes[pick]
    #print 'nms time: %.4f'%(end_time - start_time)
    return boxes
Example #6
0
    def detect_face(self, img):
        """
            detect face over img
        Parameters:
        ----------
            img: numpy array, bgr order of shape (1, 3, n, m)
                input image
        Retures:
        -------
            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
                bboxes
            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
                landmarks
        """

        # check input

        global_start_time = time()
        global_first_start_time = time()

        if img is None:
            return None

        # only works for color image
        if len(img.shape) != 3:
            return None

        # detected boxes
#        total_boxes = []

#        height, width, _ = img.shape
#        minl = min( height, width)

# get all the valid scales
#        scales = []
#        m = MIN_DET_SIZE/self.minsize
#        minl *= m
#        factor_count = 0
#        while minl > MIN_DET_SIZE:
#            scales.append(m*self.factor**factor_count)
#            minl *= self.factor
#            factor_count += 1

#############################################
# first stage
#############################################
        total_boxes = []
        i = 0
        self.index = []
        self.t = []
        for scale in self.scales:
            return_boxes = detect_first_stage(img, i, self.threshold[0],
                                              self.ctx)
            if return_boxes is not None:
                total_boxes.append(return_boxes)
            i += 1
#          return_boxes = self.Pool.apply_async(detect_first_stage_warpper, (img, i, self.threshold[0], self.ctx))
# self.index.append(i)
# return_boxes = self.Pool.map(detect_first_stage_warpper, \
#         izip(repeat(img), [i]))
# start_time1 = time()
#self.t.append(MyThread((img, self.executor1[i], scale, self.threshold[0], self.ctx)))
#self.t[i].start()
# i += 1

# for j in range(i):
#     self.t[j].join()
#     return_boxes = self.t[j].return_boxes
# if return_boxes is not None:
#     total_boxes.append(return_boxes)

# end_time1 = time()
#print 'append time: %.4f'%(end_time1 - start_time1)

# self.Pool.close()
# self.Pool.join()
#        print 'first stage time:%.4f'%(end_time - start_time)
#print 'first stage end'
#        sliced_index = self.slice_index(len(scales))
#        total_boxes = []
#        for batch in sliced_index:
#            local_boxes = self.Pool.map( detect_first_stage_warpper, \
#                    izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
#            total_boxes.extend(local_boxes)

# remove the Nones
        total_boxes = [i for i in total_boxes if i is not None]

        if len(total_boxes) == 0:
            if has_landmark == True:
                return None, None
            else:
                return None
            return None

        #print 'before'
        #print len(total_boxes)
        total_boxes = np.vstack(total_boxes)

        #print 'after'
        #print total_boxes.shape
        if total_boxes.size == 0:
            if has_landmark == True:
                return None, None
            else:
                return None
            return None

        # merge the detection from first stage
        #print 'global nms:'  + str(total_boxes.shape[0])
        total_boxes.dtype = 'float32'
        pick = gpu_nms(total_boxes[:, 0:5], float(0.7), GPU_ID)
        #pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
        total_boxes = total_boxes[pick]
        #print 'global nms time:%.4f'%(end_time - start_time)

        # refine the bboxes
        if first_has_reg == True:
            bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
            bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        #    total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
        #                             total_boxes[:, 1]+total_boxes[:, 6] * bbh,
        #                             total_boxes[:, 2]+total_boxes[:, 7] * bbw,
        #                             total_boxes[:, 3]+total_boxes[:, 8] * bbh,
        #                             total_boxes[:, 4]
        #                             ])

        #   total_boxes = total_boxes.T
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #return total_boxes
        #############################################
        # second stage
        #############################################
        num_box = total_boxes.shape[0]
        print 'first stage num: %d' % (num_box)

        #return total_boxes
        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, self.width, self.height)
        # (3, 24, 24) is the input shape for RNet
        input_buf = np.zeros((self.second_stage_num, 3, 24, 24),
                             dtype=np.float32)

        #print 'global_first time;%.4f'%(global_first_end_time - global_first_start_time)

        for i in range(num_box):
            if i >= self.second_stage_num:
                break
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            # tmp = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
        #    input_buf[i, :, :, :] = adjust_input(mx.image.imresize(tmp, 24, 24).asnumpy())
        #print 'prepare data: %.4f'%(end_time - start_time)

        if len(input_buf) < self.second_stage_num:
            input_buf = np.lib.pad(
                input_buf, ((self.second_stage_num - len(input_buf), 0),
                            (0, 0), (0, 0), (0, 0)), 'constant')
        #print 'first stage :' + str(num_box)

        if True:
            # start_time = time()
            # data_shape = [("data", input_buf.shape)]
            # input_shapes = dict(data_shape)
            # self.executor2 = self.executor2.reshape(allow_up_sizing = True, **input_shapes)
            # end_time = time()
            # print 'reshape time: %.4f'%(end_time - start_time)
            #executor = self.RNet.simple_bind(ctx = self.ctx, **input_shapes)
            #for key in executor.arg_dict.keys():
            #    if key in self.arg_params2:
            #        self.arg_params2[key].copyto(executor.arg_dict[key])

            #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/'

            start_time = time()
            self.executor2.forward(is_train=False, data=input_buf)
            output1 = self.executor2.outputs[0].asnumpy()
            output2 = self.executor2.outputs[1].asnumpy()
            #  print 'test1'
            end_time = time()
        #    print 'second stage time: %.4f'%(end_time - start_time)
#  print output.shape
# print end_time - start_time
#output = self.RNet.predict(input_buf)
# print output[:,:]

# filter the total_boxes with threshold
        if has_reg == True:
            passed = np.where(output1[:, 1] > self.threshold[1])
        else:
            #    print output.shape
            passed = np.where(output[:, 1] > self.threshold[1])

        #print output1[:, :]
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            if has_landmark == True:
                return None, None
            else:
                return None
    # print output2
        if has_reg == True:
            total_boxes[:, 4] = output1[passed, 1].reshape((-1, ))
            reg = output2[passed]
        else:
            total_boxes[:, 4] = output[passed, 1].reshape((-1, ))

        # nms
        pick = gpu_nms(total_boxes, 0.7, GPU_ID)
        total_boxes = total_boxes[pick]
        if has_reg == True:
            total_boxes = self.calibrate_box(total_boxes, reg[pick])
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
        #print 'second nms:%.4f'%(end_time -start_time)

        #############################################
        # third stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, self.width, self.height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((self.third_stage_num, 3, 48, 48),
                             dtype=np.float32)

        #global_second_end_time = time()
        #print 'global second time:%.4f'%(global_second_end_time - global_second_start_time)

        #global_third_start_time = time()

        #start_time = time()
        for i in range(num_box):
            if i >= self.third_stage_num:
                break
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        if len(input_buf) < self.third_stage_num:
            input_buf = np.lib.pad(input_buf,
                                   (self.third_stage_num - len(input_buf, 0),
                                    (0, 0), (0, 0), (0, 0)), 'constant')
        print 'second stage :' + str(num_box)
        #end_time = time()
        #print 'prepare data third stage:%.4f'%(end_time - start_time)
        #return total_boxes
        if True:
            # data_shape = [("data", input_buf.shape)]
            # input_shapes = dict(data_shape)
            # executor = self.ONet.simple_bind(ctx = self.ctx, **input_shapes)
            # for key in executor.arg_dict.keys():
            #     if key in self.arg_params3:
            #         self.arg_params3[key].copyto(executor.arg_dict[key])

            #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/'

            # start_time = time()
            # data_shape = [("data", input_buf.shape)]
            # input_shapes = dict(data_shape)
            # self.executor3 = self.executor3.reshape(allow_up_sizing = True, **input_shapes)
            # end_time = time()

            # print 'reshape time: %.4f'%(end_time - start_time)
            #   start_time = time()
            self.executor3.forward(is_train=False, data=input_buf)
            output1 = self.executor3.outputs[0].asnumpy()
            output2 = self.executor3.outputs[1].asnumpy()
            output3 = self.executor3.outputs[2].asnumpy()
            output3_1 = self.executor3.outputs[3].asnumpy()
            print output3_1.shape
#  print 'test1'
#  end_time = time()
#    print 'third stage time: %.4f'%(end_time - start_time)
#  print output.shape
# print end_time - start_time
#output = self.RNet.predict(input_buf)
#output = self.ONet.predict(input_buf)

#  print output
# filter the total_boxes with threshold
        passed = np.where(output1[:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            if has_landmark == True:
                return None, None
            else:
                return None

        total_boxes[:, 4] = output1[passed, 1].reshape((-1, ))
        if has_reg == True:
            reg = output2[passed]
        if has_landmark == True:
            points = output3[passed]

        # compute landmark points
        if has_landmark == True:
            bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
            bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
            #for i in range(len(points)):
            for t in range(10):
                if t % 2 == 0:
                    points[:, t] = points[:, t] * bbw + total_boxes[:, 0]
                else:
                    points[:, t] = points[:, t] * bbh + total_boxes[:, 1]
            #points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
            #points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
#     start_time = time()
        if has_reg == True:
            total_boxes = self.calibrate_box(total_boxes, reg)

        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        if has_landmark == True:
            points = points[pick]
    #  global_end_time = time()
    #  print 'third time %.4f'%(global_end_time - start_time)
    #  print 'global time %.4f'%(global_end_time - global_start_time)
    #  print 'global third time: %.4f'%(global_end_time - global_third_start_time)
        if not self.accurate_landmark:
            if has_landmark == True:
                return total_boxes, points
            else:
                return total_boxes

        #return total_boxes, points
        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        # patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
        # patchw = np.round(patchw*0.25)

        # make it even
        # patchw[np.where(np.mod(patchw,2) == 1)] += 1

        #  input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        #  for i in range(5):
        #      x, y = points[:, i], points[:, i+5]
        #      x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
        #      [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
        #          width,
        #          height)
        #      for j in range(num_box):
        #          tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
        #          tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
        #          input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))

        total_boxes_tmp = self.convert_to_square(total_boxes)
        #total_boxes_tmp = total_boxes.copy()

        total_boxes_tmp[:, 0:4] = np.round(total_boxes_tmp[:, 0:4])
        if False:
            width = total_boxes_tmp[:, 2] - total_boxes_tmp[:, 0]
            height = total_boxes_tmp[:, 3] - total_boxes_tmp[:, 1]
            total_boxes_tmp[:, 0] += np.round(0.1 * (width))
            # index = np.where(total_boxes_tmp[:, 0] < 0)
            # total_boxes_tmp[index, 0] = 0

            total_boxes_tmp[:, 1] += np.round(0.1 * (height))
            # index = np.where(total_boxes_tmp[:, 1] < 0)
            # total_boxes_tmp[index, 1] = 0

            total_boxes_tmp[:, 2] -= np.round(0.1 * (width))
            #      index = np.where(total_boxes_tmp[:, 2] >= self.width)
            #      total_boxes_tmp[index, 2] = self.width - 1

            total_boxes_tmp[:, 3] -= np.round(0.1 * (height))
    #     index = np.where(total_boxes_tmp[:, 3] >= self.height)
    #     total_boxes_tmp[index, 3] = self.height - 1

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes_tmp, self.width, self.height)
        input_buf = np.zeros((self.fourth_stage_num, 3, 48, 48),
                             dtype=np.float32)
        input_buf2 = np.zeros((self.fourth_stage_num, 3, 48, 48),
                              dtype=np.float32)
        input_buf3 = np.zeros((self.fourth_stage_num, 3, 64, 64),
                              dtype=np.float32)
        input_buf4 = np.zeros((self.fourth_stage_num, 3, 96, 96),
                              dtype=np.float32)
        #input_buf_rotate = np.zeros((self.fourth_stage_num, 3, 48, 48), dtype=np.float32)

        num_box = len(total_boxes_tmp)
        index = np.zeros((self.fourth_stage_num), dtype=np.uint8)
        for i in range(num_box):
            if i >= self.fourth_stage_num:
                break
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            if tmph[i] > 100 or tmpw[i] > 100:
                index[i] = 1
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            #    tmp = img[y[i]: ey[i] + 1, x[i]: ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
            # height = tmp.shape[0]
            # width = tmp.shape[1]
            # if height > 80 or width > 80:
            #     tmp = cv2.resize(tmp, (height / 8, width / 8))
            input_buf2[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
            input_buf3[i, :, :, :] = adjust_input(cv2.resize(tmp, (64, 64)))
            input_buf4[i, :, :, :] = adjust_input(cv2.resize(tmp, (96, 96)))

            #input_buf_rotate[i] = input_buf[i].copy()

        if len(input_buf) < self.fourth_stage_num:
            input_buf = np.lib.pad(input_buf,
                                   (0, self.fourth_stage_num - len(input_buf)),
                                   (0, 0), (0, 0), (0, 0), 'constant')
            input_buf2 = np.lib.pad(
                input_buf2, (0, self.fourth_stage_num - len(input_buf2)),
                (0, 0), (0, 0), (0, 0), 'constant')
            input_buf3 = np.lib.pad(
                input_buf3, (0, self.fourth_stage_num - len(input_buf3)),
                (0, 0), (0, 0), (0, 0), 'constant')
            input_buf4 = np.lib.pad(
                input_buf4, (0, self.fourth_stage_num - len(input_buf4)),
                (0, 0), (0, 0), (0, 0), 'constant')
        #print 'third stage :' + str(num_box)

        #print 'prepare data fourth stage: %.4f'%(end_time - start_time)

        self.executor4_0.forward(is_train=False, data=input_buf)
        output0_0 = self.executor4_0.outputs[0].asnumpy()
        output0_1 = self.executor4_0.outputs[1].asnumpy()
        output0_2 = self.executor4_0.outputs[2].asnumpy()
        output0_0 *= 90.
        output0_1 *= 90.
        output0_2 *= 90.
        #for t in range(input_buf_rotate.shape[0]):
        #     if output0_2[t] > 15 or output0_2[t] < -15:
        #         tmp_img = input_buf_rotate[t].transpose((1, 2, 0))
        #         tmp_img = tmp_img / 0.0078125 + 127.5
        #         angle = output0_2[t]
        #         scale = 0.9
        #         rotateMat = cv2.getRotationMatrix2D((48 / 2, 48 / 2), angle, scale)
        #         rotateImg = cv2.warpAffine(tmp_img, rotateMat, (48, 48))

        #         rotateImg = rotateImg.transpose((2, 0, 1))
        #         rotateImg = (rotateImg - 127.5) * 0.007812
        #         input_buf_rotate[t, :, :, :] = rotateImg

        self.executor4_1.forward(is_train=False, data=input_buf)
        self.executor4_3.forward(is_train=False, data=input_buf)
        # self.executor4_4.forward(is_train = False, data = input_buf2)

        output1 = self.executor4_1.outputs[0].asnumpy()
        output2 = self.executor4_3.outputs[0].asnumpy()
        # output2_1 = self.executor4_4.outputs[0].asnumpy()

        # pick = np.argmax(output2, axis = 1)
        # pick = (pick * 10 + 5) / 100.0
        # pick = np.reshape(pick, (pick.shape[0], 1))
        # output4 = self.executor4.outputs[3].asnumpy()

        #print 'cnn fourth stage: %.4f'%(end_time - start_time)
        # output = self.LNet.predict(input_buf)

        if num_box > self.fourth_stage_num:
            num_box = self.fourth_stage_num

    # for tt in range(num_box):
    #     if index[tt] == 0:
    #         output2[tt, :] = output2_1[tt, :]

        total_boxes = np.hstack([
            total_boxes_tmp[0:num_box], output1[0:num_box, 0:1],
            output2[0:num_box, 1:2]
        ])

        #return total_boxes[0:num_box], points[0: num_box]
        self.executor4_2.forward(is_train=False, data=input_buf)
        output3 = self.executor4_2.outputs[0].asnumpy()

        self.executor4_5.forward(is_train=False, data=input_buf)
        output4 = self.executor4_5.outputs[0].asnumpy()
        # print 'cnn fifth stage: %.4f'%(end_time - start_time)

        #       for i in range(101):
        #           output1[0:num_box, 0] += i * output1[0:num_box, i]
        #pick = np.argmax(output1, axis = 1)

        #pick = pick * 10
        #pick = np.reshape(pick, (pick.shape[0], 1))
        total_boxes = np.hstack(
            [total_boxes[0:num_box], output3[0:num_box, 1:2]])

        # total_boxes[0:num_box, 5] = output1[:, 0]

        self.executor5.forward(is_train=False, data=input_buf2)
        output1 = self.executor5.outputs[0].asnumpy()

        age = np.zeros((num_box, 1), dtype=np.float32)
        for i in range(num_box):
            age[i] = output1[i][0] * 1.0 + output1[i][1] * 5.0 + output1[i][
                2] * 11 + output1[i][3] * 16 + output1[i][4] * 23 + output1[i][
                    5] * 28 + output1[i][6] * 33 + output1[i][7] * 40

        pick = np.argmax(output1, axis=1)
        #pick = (pick - 1) * 5 + 10
        pick = np.reshape(pick, (pick.shape[0], 1))

        output1 = np.max(output1, axis=1)
        output1 = np.reshape(output1, (output1.shape[0], 1))

        total_boxes = np.hstack([
            total_boxes[0:num_box], output1[0:num_box], pick[0:num_box], age,
            output4[0:num_box, 1:2], output0_0[0:num_box],
            output0_1[0:num_box], output0_2[0:num_box]
        ])

        self.executor_true.forward(is_train=False, data=input_buf3)
        output1 = self.executor_true.outputs[0].asnumpy()

        self.executor_clear.forward(is_train=False, data=input_buf4)
        output2 = self.executor_clear.outputs[0].asnumpy()

        total_boxes = np.hstack([
            total_boxes[0:num_box], output1[0:num_box, 1:2], output2[0:num_box,
                                                                     1:2]
        ])

        return total_boxes[0:num_box], points[0:num_box]

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points