Exemple #1
0
class Handler:
  def __init__(self, prefix, epoch, ctx_id=0):
    print('loading',prefix, epoch)
    if ctx_id>=0:
      ctx = mx.gpu(ctx_id)
    else:
      ctx = mx.cpu()
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    all_layers = sym.get_internals()
    sym = all_layers['heatmap_output']
    image_size = (128, 128)
    self.image_size = image_size
    model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
    model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
    model.set_params(arg_params, aux_params)
    self.model = model
    self.detector = ESSHDetector('./essh-model/essh-r50', 0)

  def trans_dot(self, trans1, trans2):
    trans1 = np.vstack((trans1, [0,0,1]))
    trans2 = np.vstack((trans2, [0,0,1]))
    trans21 = np.dot(trans2, trans1)[0:2]
    return trans21
  
  def get_landmark(self, img):
    ret = self.detector.detect(img, threshold=0.4)
    if ret is None or ret.shape[0]==0:
      return None
    bbox = ret[:,0:4]
    points = ret[:, 5:15].reshape(-1,5,2)
    landmark_list = np.zeros( (bbox.shape[0], 68, 2), dtype=np.float32)
    for i in range(bbox.shape[0]):
      rimg, label, trans1 = img_helper.preprocess(img, points[i], 384)
      ret2 = self.detector.detect(rimg, threshold=0.4)
      rimg, trans2 = img_helper.transform2(rimg, None, self.image_size[0], ret2[0,0:4])
      trans = self.trans_dot(trans1, trans2)
      # cv2.imshow("rimg", rimg)
      # cv2.waitKey(0)
      img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
      img2 = np.transpose(img2, (2,0,1)) #3*128*128, RGB
      input_blob = np.zeros( (1, 3, self.image_size[1], self.image_size[0]),dtype=np.uint8 )
      input_blob[0] = img2
      ta = datetime.datetime.now()
      data = mx.nd.array(input_blob)
      db = mx.io.DataBatch(data=(data,))
      self.model.forward(db, is_train=False)
      alabel = self.model.get_outputs()[-1].asnumpy()[0]
      tb = datetime.datetime.now()
      print('module time cost', (tb-ta).total_seconds())
      IM = cv2.invertAffineTransform(trans)
      for j in xrange(alabel.shape[0]):
        a = cv2.resize(alabel[j], (self.image_size[1], self.image_size[0]))
        ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
        # landmark_list[i] = (ind[0], ind[1]) #h, w
        # landmark_list[i,j] = (ind[1], ind[0]) #w, h
        point = (ind[1], ind[0], 1.0)
        point = np.dot(IM, point)
        landmark_list[i,j] = point[0:2]
    return landmark_list
Exemple #2
0
class ESSHModel:
    def __init__(self):
        self.total_time = 0
        self.scales = [1200, 1600]
        self.t = 10
        self.esshDetector = ESSHDetector('./model/essh-r50', 0, ctx_id=-1)

    def detect_faces(self, video, output):
        self.total_time = 0
        detect_faces(video, output, 640, 360, self.detect_faces_on_img)
        return self.total_time

    def detect_faces_on_img(self, img):
        im_shape = img.shape
        target_size = self.scales[0]
        max_size = self.scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        if im_size_min > target_size or im_size_max > max_size:
            im_scale = float(target_size) / float(im_size_min)
            # prevent bigger axis from being more than max_size:
            if np.round(im_scale * im_size_max) > max_size:
                im_scale = float(max_size) / float(im_size_max)
            img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
            print('resize to', img.shape)

        # for i in range(t - 1):  # warmup
        #     faces = esshDetector.detect(img)
        timea = datetime.datetime.now()

        faces = self.esshDetector.detect(img, threshold=0.5)
        # print("DETECTED!!!!!!!")
        bbox = np.round(faces[:, 0:5])
        landmark = faces[:, 5:15].reshape(-1, 5, 2)

        timeb = datetime.datetime.now()
        diff = timeb - timea
        diff = diff.total_seconds()
        self.total_time += diff
        # print('detection uses', diff.total_seconds(), 'seconds')
        # print('find', faces.shape[0], 'faces')

        for b in bbox:
            cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])),
                          (0, 255, 0), 2)
        for p in landmark:
            for i in range(5):
                cv2.circle(img, (p[i][0], p[i][1]), 1, (0, 0, 255), 2)
        return img
class Handler:
    def __init__(self, prefix, epoch, ctx_id=0):
        print('loading', prefix, epoch)
        if ctx_id >= 0:
            ctx = mx.gpu(ctx_id)
        else:
            ctx = mx.cpu()
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['heatmap_output']
        image_size = (128, 128)
        self.image_size = image_size
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        #model = mx.mod.Module(symbol=sym, context=ctx)
        model.bind(for_training=False,
                   data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
                                ])
        model.set_params(arg_params, aux_params)
        self.model = model
        # mtcnn_path = os.path.join(os.path.dirname(__file__),  'mtcnn_model')
        # self.det_threshold = [0.6,0.7,0.8]
        # self.detector = MtcnnDetector(model_folder=mtcnn_path, ctx=mx.cpu(), num_worker=1, accurate_landmark = True, threshold=self.det_threshold)
        self.detector = ESSHDetector('./essh-model/essh', 0)

    def get(self, img):
        # ret = self.detector.detect_face(img)
        ret = self.detector.detect(img, threshold=0.5)
        if ret is None:
            return None
        bbox = ret[:, 0:4]
        points = ret[:, 5:15].reshape(-1, 5, 2)
        # bbox, points = ret
        # if bbox.shape[0]==0:
        #   return None
        # bbox = bbox[:,0:4]
        # points = points[:,:].reshape((-1,2,5))
        # points = np.transpose(points, (0,2,1))
        # for b in bbox:
        #   cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 255, 0), 2)
        # for p in points:
        #   for i in range(5):
        #     cv2.circle(img, (p[i][0], p[i][1]), 1, (0, 0, 255), 2)
        # cv2.imshow("detection result", img)
        # cv2.waitKey(0)

        M = np.zeros((bbox.shape[0], 2, 3), dtype=np.float32)
        ret = np.zeros((bbox.shape[0], 68, 2), dtype=np.float32)
        for i in range(bbox.shape[0]):
            M[i] = img_helper.estimate_trans_bbox(bbox[i, :],
                                                  self.image_size[0],
                                                  s=1.2)
            rimg = cv2.warpAffine(img, M[i], self.image_size, borderValue=0.0)
            img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
            img2 = np.transpose(img2, (2, 0, 1))  #3*112*112, RGB
            # cv2.imshow("detection result", rimg)
            # cv2.waitKey(0)
            # filename = 'sample-images/%d.jpg'%(i+1)
            # cv2.imwrite(filename, rimg)
            input_blob = np.zeros(
                (1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
            input_blob[0] = img2
            ta = datetime.datetime.now()
            data = mx.nd.array(input_blob)
            db = mx.io.DataBatch(data=(data, ))
            self.model.forward(db, is_train=False)
            alabel = self.model.get_outputs()[-1].asnumpy()[0]
            tb = datetime.datetime.now()
            print('module time cost', (tb - ta).total_seconds())
            # ret = np.zeros( (alabel.shape[0], 2), dtype=np.float32)
            for j in xrange(alabel.shape[0]):
                a = cv2.resize(alabel[j],
                               (self.image_size[1], self.image_size[0]))
                ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
                #ret[i] = (ind[0], ind[1]) #h, w
                ret[i, j] = (ind[1], ind[0])  #w, h
        return ret, M
Exemple #4
0
im_shape = img.shape
print(im_shape)
target_size = scales[0]
max_size = scales[1]
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if im_size_min > target_size or im_size_max > max_size:
    im_scale = float(target_size) / float(im_size_min)
    # prevent bigger axis from being more than max_size:
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
    print('resize to', img.shape)

for i in xrange(t - 1):  #warmup
    faces = detector.detect(img)
timea = datetime.datetime.now()

faces = detector.detect(img, threshold=0.5)
bbox = np.round(faces[:, 0:5])
landmark = faces[:, 5:15].reshape(-1, 5, 2)

timeb = datetime.datetime.now()
diff = timeb - timea
print('detection uses', diff.total_seconds(), 'seconds')
print('find', faces.shape[0], 'faces')
print(bbox)
print(landmark)

# for i in xrange(faces.shape[0]):
#   cv2.rectangle(img, (faces[i,0],faces[i,1]), (faces[i,2],faces[i,3]), (0,255,0), 2)
class Handler:
    def __init__(self, prefix, epoch, ctx_id=0):
        print('loading', prefix, epoch)
        if ctx_id >= 0:
            ctx = mx.gpu(ctx_id)
        else:
            ctx = mx.cpu()
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['heatmap_output']
        image_size = (128, 128)
        self.image_size = image_size
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
        model.bind(for_training=False,
                   data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
                                ])
        model.set_params(arg_params, aux_params)
        self.model = model
        self.detector = ESSHDetector('./essh-model/essh', 0)

    def trans_dot(self, trans1, trans2):
        trans1 = np.vstack((trans1, [0, 0, 1]))
        trans2 = np.vstack((trans2, [0, 0, 1]))
        trans21 = np.dot(trans2, trans1)[0:2]
        return trans21

    def get_maxpos(self, img, det):
        img_size = np.asarray(img.shape)[0:2]
        # bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
        img_center = img_size / 2
        offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
                             (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
        offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
        # bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
        bindex = np.argmin(
            offset_dist_squared)  # some extra weight on the centering
        return bindex

    def get_landmark(self, img, label, dataset, use_essh):
        if use_essh:
            ret = self.detector.detect(img, threshold=0.4)
            if ret is None or ret.shape[0] == 0:
                return None, None
            bindex = self.get_maxpos(img, ret)
            face = ret[bindex]
            bbox = face[0:4]
            points = face[5:15].reshape(5, 2)
            # b = bbox
            # cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255))
            # for p in landmark:
            #   cv2.circle(img, (int(p[0]), int(p[1])), 1, (0, 0, 255), 2)
            # cv2.imshow("detection result", img)
            # cv2.waitKey(0)
            # for i in range(bbox.shape[0]):
            rimg, label2, trans1 = img_helper.preprocess(
                img, points, img.shape[0])
            ret2 = self.detector.detect(rimg, threshold=0.4)
            if ret2 is None or ret2.shape[0] == 0:
                return None, None
            bindex2 = self.get_maxpos(rimg, ret2)
            rimg, trans2 = img_helper.transform2(rimg, None,
                                                 self.image_size[0],
                                                 ret2[bindex2, 0:4], dataset)
        else:
            rimg, label2, trans1 = img_helper.preprocess(
                img, label, img.shape[0])
            rimg, trans2 = img_helper.transform2(rimg, label2,
                                                 self.image_size[0], None,
                                                 dataset)
        trans = self.trans_dot(trans1, trans2)
        # cv2.imshow("rimg", rimg)
        # cv2.waitKey(0)
        # img2 = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
        img2 = np.transpose(rimg, (2, 0, 1))  #3*128*128, RGB
        input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]),
                              dtype=np.uint8)
        input_blob[0] = img2
        data = mx.nd.array(input_blob)
        db = mx.io.DataBatch(data=(data, ))
        self.model.forward(db, is_train=False)
        alabel = self.model.get_outputs()[-1].asnumpy()[0]
        IM = cv2.invertAffineTransform(trans)
        landmark = np.zeros((68, 2), dtype=np.float32)
        for i in xrange(alabel.shape[0]):
            a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0]))
            ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
            point = (ind[1], ind[0], 1.0)  #w, h
            point = np.dot(IM, point)
            landmark[i] = point[0:2]
            npt = img_helper.transform_pt(label[i], trans)
            if config.landmark_type == '2d':
                npt = np.floor(npt)
            else:
                npt = np.round(npt)
            point = (npt[0], npt[1], 1.0)
            point = np.dot(IM, point)
            label[i] = point[0:2]
        return landmark, label