Beispiel #1
0
def PoseEstimatorPredict(image_path,plot = False,resolution ='432x368', scales = '[None]',model = 'mobilenet_thin'):
    '''
    input:
        image_path,图片路径,jpg
        plot = False,是否画图,如果True,两样内容,关键点信息+标点图片matrix
        resolution ='432x368', 规格
        scales = '[None]',
        model = 'mobilenet_thin',模型选择
    
    output:
        plot为false,返回一个内容:关键点信息
        plot为true,返回两个内容:关键点信息+标点图片matrix
    '''
    w, h = model_wh(resolution)
    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    image = common.read_imgfile(image_path, None, None)
    t = time.time()
    humans = e.inference(image, scales=scales)  # 主要的预测函数
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (image_path, elapsed))
    centers = get_keypoint(image,humans)                                # 拿上关键点信息

    if plot:
        # 画图的情况下:
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)  # 画图函数
        fig = plt.figure()
        a = fig.add_subplot(2, 2, 1)
        a.set_title('Result')
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        return centers,image
    else:
        # 不画图的情况下:
        return centers
Beispiel #2
0
def main(input_img, model, e):
  '''
  Query the model given an image
  '''
  if(input_img):
    image = stringToImage(input_img[input_img.find(",")+1:])
    image = toRGB(image)

    if(model == None):
      model = 'mobilenet_thin'

    humans = e.inference(image)
    coords = []
    for human in humans:
      coords.append([[HUMAN_COCO_PART[k], b.x, b.y] for k, b in human.body_parts.items()])

    outdata = {
      'humans': coords
    }
    return outdata

  else:
    # Test with a sample image
    image = common.read_imgfile('./images/p1.jpg', None, None)
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368))
    humans = e.inference(image)
    coords = []
    for human in humans:
      coords.append([[HUMAN_COCO_PART[k], b.x, b.y] for k, b in human.body_parts.items()])

    outdata = {
      'humans': coords
    }
    return outdata
Beispiel #3
0
class ConvPoseMachine(Detector):
    def __init__(self, input_wicth=800, input_height=640):
        import sys
        sys.path.append('./src/detector/tf-pose-estimation/src')

        self.model = 'cmu'
        import common
        self.enum_coco_parts = common.CocoPart
        self.enum_coco_colors = common.CocoColors
        self.enum_coco_pairs_render = common.CocoPairsRender

        from estimator import TfPoseEstimator
        from networks import get_graph_path
        self.image_h, self.image_w = input_height, input_wicth
        if self.image_w % 16 != 0 or self.image_h % 16 != 0:
            raise Exception(
                'Width and height should be multiples of 16. w=%d, h=%d' %
                (self.image_w, self.image_h))
        print('Warming up detector ConvPoseMachine....')
        import time
        s = time.time()
        self.estimator = TfPoseEstimator(get_graph_path(self.model),
                                         target_size=(self.image_w,
                                                      self.image_h))
        e = time.time()
        print('ConvPoseMachine Warmed, Time: {}'.format(e - s))

    def predict(self, imgcv):
        # Build model based on input image size
        img_h, img_w, _ = imgcv.shape

        humans = self.estimator.inference(imgcv)
        formatted_dets = []

        for human in humans:
            key_point = {}
            # draw point
            for i in range(self.enum_coco_parts.Background.value):
                if i not in human.body_parts.keys():
                    continue

                body_part = human.body_parts[i]
                x = int(
                    (body_part.x * self.image_w + 0.5) * img_w / self.image_w)
                y = int(
                    (body_part.y * self.image_h + 0.5) * img_h / self.image_h)
                center = (x, y)

                key_point[i] = center

            detection = get_default_detection()
            detection['person_keypoint'] = key_point
            formatted_dets.append(detection)

        return formatted_dets
Beispiel #4
0
def index():
    try:
        data = request.data
        with open('/tmp/temp.jpg', 'wb') as f:
            f.write(data)
        img = common.read_imgfile('/tmp/temp.jpg', 432, 368)
        scales = ast.literal_eval(args.scales)
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
        humans = e.inference(img, scales=scales)
        return jsonify({"humans": list(map(lambda x: x.to_dict(), humans))})
    except Exception as e:
        return jsonify({"error": str(traceback.format_exc())})
class tfOpenpose:
    def __init__(self,
                 zoom=1.0,
                 resolution='656x368',
                 model='cmu',
                 show_process=False):
        self.zoom = zoom
        self.resolution = resolution
        self.model = model
        self.show_process = show_process
        logger.debug('initialization %s : %s' % (model, get_graph_path(model)))
        self.w, self.h = model_wh(resolution)
        self.e = TfPoseEstimator(get_graph_path(model),
                                 target_size=(self.w, self.h))

    def detect(self, image):
        logger.debug('image preprocess+')
        if self.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=self.zoom,
                                    fy=self.zoom,
                                    interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0],
                   dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif self.zoom > 1.0:
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=self.zoom,
                                    fy=self.zoom,
                                    interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        logger.debug('image process+')
        humans = self.e.inference(image)

        logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
        fps_time = 0

        logger.debug('show+')
        cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        logger.debug('finished+')
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser(
        description='tf-pose-estimation run by folder')
    parser.add_argument('--folder', type=str, default='./images/')
    parser.add_argument('--resolution',
                        type=str,
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    types = ('*.png', '*.jpg')
    files_grabbed = []
    for files in types:
        files_grabbed.extend(glob.glob(os.path.join(args.folder, files)))
    all_humans = dict()
    if not os.path.exists('output'):
        os.mkdir('output')
    for i, file in enumerate(files_grabbed):
        # estimate human poses from a single image !
        image = common.read_imgfile(file, None, None)
        t = time.time()
        humans = e.inference(image, scales=scales)
        elapsed = time.time() - t

        logger.info('inference image #%d: %s in %.4f seconds.' %
                    (i, file, elapsed))

        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
        # cv2.imshow('tf-pose-estimation result', image)
        filename = 'pose_{}.png'.format(i)
        output_filepath = os.path.join('output', filename)
        cv2.imwrite(output_filepath, image)
        logger.info('image saved: {}'.format(output_filepath))
        # cv2.waitKey(5000)

        all_humans[file.replace(args.folder, '')] = humans

    with open(os.path.join(args.folder, 'pose.dil'), 'wb') as f:
        dill.dump(all_humans, f, protocol=dill.HIGHEST_PROTOCOL)
Beispiel #7
0
def joint():
    try:
        data = request.data
        with open('/tmp/temp.jpg', 'wb') as f:
            f.write(data)
        img = common.read_imgfile('/tmp/temp.jpg', 432, 368)
        scales = ast.literal_eval(args.scales)
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
        humans = e.inference(img, scales=scales)
        image = TfPoseEstimator.draw_humans(img, humans, imgcopy=False)
        b_str = base64.b64encode(img2bytes(
            Image.fromarray(image))).decode('utf-8')
        return jsonify({"image": b_str})
    except Exception as e:
        return jsonify({"error": str(traceback.format_exc())})
Beispiel #8
0
class Detector:
    def __init__(self, show=True):
        model = 'mobilenet_thin'
        w, h = model_wh('432x368')
        self.estimator = TfPoseEstimator(get_graph_path(model),
                                         target_size=(w, h))
        self.show = show

    def detectCandidates(self, frame):

        cands = []
        humans = self.estimator.inference(frame)
        image_h, image_w = frame.shape[:2]

        feat_list = []
        for i in range(len(humans)):
            if i >= len(humans):
                break

            keys = humans[i].body_parts.keys()
            if len(np.setdiff1d(needed_elements, keys)):
                del humans[i]
                continue
            neck = humans[i].body_parts[1]
            lhip = humans[i].body_parts[8]
            rhip = humans[i].body_parts[11]
            center = (neck.x + lhip.x + rhip.x) / 3, (neck.y + lhip.y +
                                                      rhip.y) / 3

            feats = []
            for idx in needed_elements:
                part = humans[i].body_parts[idx]
                feats = feats + [part.x - center[0], part.y - center[1]]

            feat_list.append(np.asarray(feats))

            center = image_w * center[0], image_h * center[1]
            cv2.circle(frame, (int(center[0]), int(center[1])), 3, (255, 0, 0),
                       3)
            cands.append(np.asarray(center, dtype=np.float32))

        # print feat_list[0]

        if (self.show):
            frame = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False)
        return cands, feat_list, frame
def cnvt(img, name) :
    os.chdir(read_path)
    image = cv2.imread(img)

    os.chdir(old_path)
    model = 'mobilenet_thin'
    resolution = '432x368'
    w, h = model_wh(resolution)

    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    humans = e.inference(image)
    blank_image = np.zeros((h,w,3), np.uint8)
    image = TfPoseEstimator.draw_humans(blank_image, humans, imgcopy=False)

    os.chdir(save_path)
    cv2.imwrite(name, image)
    print("Saved - %s As - %s" % (img, name))

    os.chdir(old_path)
Beispiel #10
0
def generate_skeletonize_video():
    """
    The method takes images , save a skeleton per image and creates a video output
    :return:
    """
    input_folder = "./videos/demo/"
    output_folder = "./videos"
    results_folder = "./images/demo/"
    input_video = "./videos/demo.mp4"
    images = video_utils.load_images_from_folder(input_folder)
    w = 432
    h = 368
    estimator = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                                target_size=(w, h))
    count = 1
    for i in images:
        image_parts = estimator.inference(i, scales=None)
        image_skeleton = TfPoseEstimator.draw_humans(i,
                                                     image_parts,
                                                     imgcopy=True)
        cv2.imwrite(r".\images\demo\{}.png".format(count), image_skeleton)
        count = count + 1
    video_utils.create_video(input_video, results_folder, output_folder)
Beispiel #11
0
    parser = argparse.ArgumentParser(description='tf-pose-estimation run') 						# Adding arguments to the programs
    parser.add_argument('--image', type=str, default='../images/p1.jpg')							# Adding images name else it will take the default image
    parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')	# Specify resolution 
    parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin')			# Specify Model
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')	# Scales - Reason Unknown 
    args = parser.parse_args()												# Argument contain all the parse
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution) #Return width and height into w, h respectively after checking if its a multiple of 16
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))		# Model + width and height

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()

    import matplotlib.pyplot as plt

    fig = plt.figure()
    a = fig.add_subplot(2, 2, 1)
    a.set_title('Result')
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        logger.debug('image process+')
        humans = e.inference(image)

        logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        logger.debug('show+')
        cv2.putText(image,
                    "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        if cv2.waitKey(1) == 27:
            break
        logger.debug('finished+')
Beispiel #13
0
        # get image form the camera
        ret_val, image = camera.read()
        # boilerplate
        canvas = np.zeros_like(image)
        img_scaled = cv2.resize(image,
                                None,
                                fx=args.zoom,
                                fy=args.zoom,
                                interpolation=cv2.INTER_LINEAR)
        dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
        dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
        canvas[dy:dy + img_scaled.shape[0],
               dx:dx + img_scaled.shape[1]] = img_scaled
        image = canvas
        # feed image into the neural network
        humans = e.inference(image)  # list of humans
        for id, human in enumerate(humans):

            Neck = 1
            LWrist = 1
            RWrist = 1
            # this works ready for submission
            # i know it works and we did the screenshots ready to show
            #another change for co author
            for k, v in human.body_parts.items():
                if POSE_COCO_BODY_PARTS[k] == "Neck":
                    Neck = v.y
                elif POSE_COCO_BODY_PARTS[k] == "RWrist":
                    RWrist = v.y
                elif POSE_COCO_BODY_PARTS[k] == "LWrist":
                    LWrist = v.y
Beispiel #14
0
class Terrain(object):
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        # Initialize plot.
        plt.ion()
        f = plt.figure(figsize=(5, 5))
        f2 = plt.figure(figsize=(6, 5))

        self.window3DBody = f.gca(projection='3d')
        self.window3DBody.set_title('3D_Body')
        self.windowStable = f2.add_subplot(1, 1, 1)
        self.windowStable.set_title('Stable')
        self.windowStable.set_xlabel('Time')
        self.windowStable.set_ylabel('Distant')
        self.windowStable.set_ylim([0, 1500])

        #plt.show()
        self.times = [0]
        self.stable = [0]
        self.recordHead = []
        self.fps_time = 0

        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        #model = 'cmu'
        #w,h = 656,368
        camera = 1  #1 mean external camera , 0 mean internal camera

        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read(cv2.IMREAD_COLOR)

        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        self.statusBodyWindow = 0
        try:
            keypoints = self.mesh(image)

        except:
            pass

    def mesh(self, image):
        image_h, image_w = image.shape[:2]
        width = 300
        height = 300
        pose_2d_mpiis = []
        visibilities = []
        zoom = 1.0
        if zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=args.zoom,
                                    fy=args.zoom,
                                    interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0],
                   dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif zoom > 1.0:
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=args.zoom,
                                    fy=args.zoom,
                                    interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        humans = self.e.inference(image, scales=[None])
        package = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        image = package[0]
        status_part_body_appear = package[1]
        name_part_body = [
            "Nose",
            "Neck",
            "RShoulder",
            "RElbow",
            "RWrist",
            "LShoulder",
            "LElbow",
            "LWrist",
            "RHip",
            "RKnee",
            "RAnkle",
            "LHip",
            "LKnee",
            "LAnkle",
            "REye",
            "LEye",
            "REar",
            "LEar",
        ]
        detected_part = []
        for human in humans:
            pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
            pose_2d_mpiis.append([(int(x * width + 0.5), int(y * height + 0.5))
                                  for x, y in pose_2d_mpii])
            visibilities.append(visibility)

        cv2.putText(
            image, "FPS: %f [press 'q'to quit]" %
            (1.0 / (time.time() - self.fps_time)), (10, 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        image = cv2.resize(image, (width, height))
        cv2.imshow('tf-pose-estimation result', image)

        pose_2d_mpiis = np.array(pose_2d_mpiis)
        visibilities = np.array(visibilities)
        transformed_pose2d, weights = self.poseLifting.transform_joints(
            pose_2d_mpiis, visibilities)
        pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights)
        for i, single_3d in enumerate(pose_3d):
            #plot_pose(single_3d)
            plot_pose_adapt(single_3d, self.window3DBody)
        self.fps_time = time.time()

        #Matt plot lib
        #print(pose_3d)
        #----------------------------------------
        #pyQT graph
        pose_3dqt = np.array(pose_3d[0]).transpose()

        bodyPartName = [
            'C_Hip', 'R_Hip', 'R_Knee', 'R_Ankle', 'L_Hip', 'L_Knee',
            'L_Ankle', 'Center', 'C_Shoulder', 'Neck', 'Head', 'L_Shoulder',
            'L_Elbow', 'L_Wrist', 'R_Shoulder', 'R_Elbow', 'R_Wrist'
        ]
        #for part in range(len(pose_3dqt)):
        #    print(bodyPartName[part],pose_3dqt[part])

        #for id_part in range(len(status_part_body_appear)):
        #check this part body appear or not
        #   if status_part_body_appear[id_part] == 1:
        #      print("%-10s"%name_part_body[id_part],": appear")
        #     detected_part.append(id_part)
        #else:
        #   print("%-10s"%name_part_body[id_part],": disappear")
        #list_to_check_fall_deteced = [[1,8]  , #neck,RHIP
        #                                [1,9],  #neck RKnee
        #                                [1,10], #neck RAnkle
        #                                [1,11], #neck LHip
        #                                [1,12], #neck LKne e
        #                                [1,13]]  #neck LAnkle
        if int(self.fps_time) % 1 == 0:  #every 1 second record
            self.times = self.times + [self.times[-1] + 1]
            if len(self.stable) > 1000:
                self.stable = self.stable[200:]
                self.recordHead = self.recordHead[200:]
            if self.stable == [0]:
                self.stable = self.stable + [0]
                self.recordHead = [pose_3dqt[10][2]] + [pose_3dqt[10][2]]
            else:
                #highest 800  , 550-600 average
                self.stable = self.stable + [
                    abs(pose_3dqt[10][2] - self.recordHead[-1])
                ]
                self.recordHead = self.recordHead + [pose_3dqt[10][2]]

        status_found = 0
        for id_part in detected_part:
            #if id_part in [8,9,10,11,12,13] and 1 in detected_part:
            #    status_found = 1
            if id_part in [8, 11] and 1 in detected_part:
                status_found = 1
        if status_found:
            print("-------Ready for detece--------")
            if self.fall_detection(pose_3dqt):
                print("-----\nFOUND !!!\n-----")
        #----------
        keypoints = pose_3d[0].transpose()

        return keypoints / 80

    def fall_detection(self, pose_3dqt):
        print("VALUE Z : ", "NECK , C_HIP",
              ((pose_3dqt[10][2] - pose_3dqt[0][2])**2)**(1 / 2))
        #print("VALUE Z : ","NECK , R_HIP",((pose_3dqt[10][2] - pose_3dqt[1][2])**2)**(1/2))
        #print("VALUE Z : ","NECK , R_KNEE",((pose_3dqt[10][2] - pose_3dqt[2][2])**2)**(1/2))
        #print("VALUE Z : ","NECK , R_ANKLE",((pose_3dqt[10][2] - pose_3dqt[3][2])**2)**(1/2))
        #print("VALUE Z : ","NECK , L_HIP",((pose_3dqt[10][2] - pose_3dqt[4][2])**2)**(1/2))
        #print("VALUE Z : ","NECK , L_KNEE",((pose_3dqt[10][2] - pose_3dqt[5][2])**2)**(1/2))
        #print("VALUE Z : ","NECK , L_ANKLE",((pose_3dqt[10][2] - pose_3dqt[6][2])**2)**(1/2))
        #NECK C_HIP
        if ((pose_3dqt[10][2] - pose_3dqt[0][2])**2)**(1 / 2) <= 200:
            return True
        #NECK R_HIP    Z-graph
        #elif ((pose_3dqt[10][2] - pose_3dqt[1][2])**2)**(1/2)  <= 200:
        #    return True
        #NECK  R_Knee
        #elif ((pose_3dqt[10][2] - pose_3dqt[2][2])**2)**(1/2)  <= 200:
        #    return True
        #NECK RAnkle
        #elif ((pose_3dqt[10][2] - pose_3dqt[3][2])**2)**(1/2)  <= 200:
        #    return True
        #NECK LHip
        #elif ((pose_3dqt[10][2] - pose_3dqt[4][2])**2)**(1/2)  <= 200:
        #    return True
        #NECK LKnee
        #elif ((pose_3dqt[10][2] - pose_3dqt[5][2])**2)**(1/2)  <= 200:
        #    return True
        #NECK LAnkle
        #elif ((pose_3dqt[10][2] - pose_3dqt[6][2])**2)**(1/2)  <= 200:
        #    return True
        return False

    def update(self):
        """
        update the mesh and shift the noise each time
        """
        ret_val, image = self.cam.read()
        try:
            keypoints = self.mesh(image)
            self.generateGraphStable()

        except AssertionError:
            print('body not in image')
        else:
            pass

    def generateGraphStable(self):
        plt.plot(self.times, self.stable)
        plt.pause(0.1)

    def start(self):
        """
        get the graphics window open and setup
        """
        #if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        #QtGui.QApplication.instance().exec_()

    def animation(self):
        while True:
            self.update()
            if cv2.waitKey(1) == ord('q'):
                self.cam.release()
                cv2.destroyAllWindows()
                break
Beispiel #15
0
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    result = []
    for i, k in enumerate(tqdm(keys)):
        img_meta = cocoGt.loadImgs(k)[0]
        img_idx = img_meta['id']

        img_name = os.path.join(image_dir, img_meta['file_name'])
        image = read_imgfile(img_name, None, None)
        if image is None:
            logger.error('image not found, path=%s' % img_name)
            sys.exit(-1)

        # inference the image with the specified network
        humans = e.inference(image,
                             resize_to_default=(w > 0 and h > 0),
                             upsample_size=args.resize_out_ratio)

        scores = 0
        ann_idx = cocoGt.getAnnIds(imgIds=[img_idx], catIds=[1])
        anns = cocoGt.loadAnns(ann_idx)
        for human in humans:
            item = {
                'image_id':
                img_idx,
                'category_id':
                1,
                'keypoints':
                write_coco_json(human, img_meta['width'], img_meta['height']),
                'score':
                human.score
class Terrain(object):
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin_432x368'
        camera = 0
        w, h = model_wh(model)
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        keypoints = self.mesh(image)

        self.points = gl.GLScatterPlotItem(pos=keypoints,
                                           color=pg.glColor((0, 255, 0)),
                                           size=15)
        self.window.addItem(self.points)

    def mesh(self, image):
        image_h, image_w = image.shape[:2]
        width = 640
        height = 480
        pose_2d_mpiis = []
        visibilities = []

        humans = self.e.inference(image, scales=[None])

        for human in humans:
            pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
            pose_2d_mpiis.append([(int(x * width + 0.5), int(y * height + 0.5))
                                  for x, y in pose_2d_mpii])
            visibilities.append(visibility)

        pose_2d_mpiis = np.array(pose_2d_mpiis)
        visibilities = np.array(visibilities)
        transformed_pose2d, weights = self.poseLifting.transform_joints(
            pose_2d_mpiis, visibilities)
        pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights)

        keypoints = pose_3d[0].transpose()

        return keypoints / 80

    def update(self):
        """
        update the mesh and shift the noise each time
        """
        ret_val, image = self.cam.read()
        try:
            keypoints = self.mesh(image)
        except AssertionError:
            print('body not in image')
        else:
            self.points.setData(pos=keypoints)

    def start(self):
        """
        get the graphics window open and setup
        """
        if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
            QtGui.QApplication.instance().exec_()

    def animation(self, frametime=10):
        """
        calls the update method to run in a loop
        """
        timer = QtCore.QTimer()
        timer.timeout.connect(self.update)
        timer.start(frametime)
        self.start()
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p3.jpg')
    parser.add_argument(
        '--resolution',
        type=str,
        default='432x368',
        help='network input resolution. default=432x368')
    parser.add_argument(
        '--model',
        type=str,
        default='mobilenet_thin',
        help='cmu / mobilenet_thin')
    parser.add_argument(
        '--scales',
        type=str,
        default='[None]',
        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    parser.add_argument(
        '--stick_only',
        action='store_true',
        help='save output without other analysis')
    parser.add_argument('--plot_3d', action='store_true', help='save 3d poses')

    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()

    import matplotlib.pyplot as plt

    fig = plt.figure()
    a = fig.add_subplot(2, 2, 1)
    a.set_title('Result')
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    if args.stick_only:
        cv2.imwrite('output/test.png', image)
        logger.info('image saved: {}'.format('output/test.png'))
        import sys
        sys.exit(0)
    plt.imshow(rgb_image)

    bgimg = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB)
    bgimg = cv2.resize(
        bgimg, (e.heatMat.shape[1], e.heatMat.shape[0]),
        interpolation=cv2.INTER_AREA)

    # show network output
    a = fig.add_subplot(2, 2, 2)
    plt.imshow(bgimg, alpha=0.5)
    tmp = np.amax(e.heatMat[:, :, :-1], axis=2)
    plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    tmp2 = e.pafMat.transpose((2, 0, 1))
    tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0)
    tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0)

    a = fig.add_subplot(2, 2, 3)
    a.set_title('Vectormap-x')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    a = fig.add_subplot(2, 2, 4)
    a.set_title('Vectormap-y')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()
    if not os.path.exists('output'):
        os.mkdir('output')
    plt.savefig('output/test.png')
    logger.info('image saved: {}'.format('output/test.png'))

    if not args.plot_3d:
        import sys
        sys.exit(0)
    #For plotting in 3d
    logger.info('3d lifting initialization.')
    poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat')

    image_h, image_w = image.shape[:2]
    standard_w = 640
    standard_h = 480

    pose_2d_mpiis = []
    visibilities = []
    for human in humans:
        pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
        pose_2d_mpiis.append([(int(x * standard_w + 0.5),
                               int(y * standard_h + 0.5))
                              for x, y in pose_2d_mpii])
        visibilities.append(visibility)

    pose_2d_mpiis = np.array(pose_2d_mpiis)
    visibilities = np.array(visibilities)
    transformed_pose2d, weights = poseLifting.transform_joints(
        pose_2d_mpiis, visibilities)
    pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)

    for i, single_3d in enumerate(pose_3d):
        plot_pose(single_3d)
    plt.draw()
    plt.savefig('output/pose_3d_test.png')
    logger.info('3d plot saved: {}'.format('output/pose_3d_test.png'))
class Terrain(object):
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        self.bitFalling = 0
        # Initialize plot.
        self.times = []
        self.recordVelocity = []
        self.recordNeck = []
        self.recordYTopRectangle = []
        self.recordHIP = []
        self.recordTimeList = []
        self.globalTime = 0
        self.highestNeck = 0
        # self.hightestNeckTime = 0
        self.highestHIP = 0
        self.saveTimesStartFalling = -1

        self.quoutaFalling = 0
        self.surpriseMovingTime = -1
        self.detectedHIP_Y = 0
        self.detectedNECK_Y = 0
        self.extraDistance = 0

        self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1,
                                                       varThreshold=500,
                                                       detectShadows=False)
        self.secondNeck = 0
        self.human_in_frame = False
        self.lastTimesFoundNeck = -1
        self.width = 300
        self.height = 200
        self.quotaVirtureNeck = 3
        self.used_quotaVirtureNeck = 0
        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.recordAcceleration = []

    def reduceRecord(self):
        self.recordNeck = self.recordNeck[-100:]
        self.recordHIP = self.recordHIP[-100:]
        self.times = self.times[-100:]
        self.recordVelocity = self.recordVelocity[-100:]
        self.recordTimeList = self.recordTimeList[-100:]
        self.recordYTopRectangle = self.recordYTopRectangle[-100:]
        self.recordAcceleration = self.recordAcceleration[-100:]

    def getLastRecordTime(self):
        if self.recordTimeList == []:
            return 0
        return self.recordTimeList[-1]

    def addCountTimes(self):
        if self.times == []:
            self.times = self.times + [1]
        else:
            self.times = self.times + [self.times[-1] + 1]

    def addRecordTime(self, time):
        self.recordTimeList = self.recordTimeList + [time]

    def addRecordHIP(self, hip):
        self.recordHIP = self.recordHIP + [hip]

    def addRecordNeck(self, neck):
        self.recordNeck = self.recordNeck + [neck]

    def addRecordVelocity(self, neck, time):
        v = (abs(neck[-1] - neck[-2]) / abs(time[-1] - time[-2]))
        self.recordVelocity = self.recordVelocity + [int(v)]

    def destroyAll(self):
        self.times = []
        self.recordNeck = []
        self.recordHIP = []
        self.recordTimeList = []
        self.recordVelocity = []
        self.recordAcceleration = []
        self.recordYTopRectangle = []
        self.quoutaFalling = 0
        self.resetSurpriseMovingTime()
        self.resetBitFalling()

    def detecedFirstFalling(self):
        self.detectedNECK_Y = self.highestNeck
        self.detectedHIP_Y = self.highestHIP
        print(
            '-----!!!!falling!!!!!!----------------------------------------------'
        )
        self.surpriseMovingTime = self.globalTime
        self.saveTimesStartFalling = self.times[-1]
        print('set extraDistance')
        self.extraDistance = (self.detectedHIP_Y - self.detectedNECK_Y) * (1 /
                                                                           2)
        print('extraDis : ', self.extraDistance)
        print('set complete ')

    def countdownFalling(self):
        # print('StartTime From: ',self.surpriseMovingTime)
        print('!!!!!Countdown[10] : ',
              self.globalTime - self.surpriseMovingTime, '!!!!!------------')
        # print('would like to Cancel Countdown \nTake your neck to same level as NECK , HIP : ',self.detectedNECK_Y,self.detectedHIP_Y)
        # print('current your NECK : ',self.getLastNeck())
        # print('extraTotal:',self.detectedHIP_Y+self.extraDistance)
        #maybe not Falling but make sure with NECK last must move up to this position
        # print('Check in second stage.')
    def resetSurpriseMovingTime(self):
        self.surpriseMovingTime = -1

    def getLastNeck(self):
        return self.recordNeck[-1]

    def getLastTimes(self):
        return self.times[-1]

    def getSecondNeck(self):
        return self.secondNeck

    def getLastTimesFoundNeck(self):
        return self.lastTimesFoundNeck

    def addStatusFall(self, image):
        color = (0, 255, 0)
        if self.surpriseMovingTime != -1:
            color = (0, 0, 255)
        cv2.circle(image, (10, 10), 10, color, -1)

    def savesecondNeck(self, image):
        blur = cv2.GaussianBlur(image, (5, 5), 0)
        fgmask = self.fgbg.apply(blur)
        cnts = cv2.findContours(fgmask.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if imutils.is_cv2() else cnts[1]
        x_left = -1
        y_left = -1
        x_right = -1
        y_right = -1
        for c in cnts:
            # if the contour is too small, ignore it
            # if cv2.contourArea(c) > 500:
            #     continue
            # compute the bounding box for the contour, draw it on the frame,
            # and update the text
            (x, y, w, h) = cv2.boundingRect(c)
            if x_left == -1:
                x_left = x
                y_left = y
            if x < x_left:
                x_left = x
            if y < y_left:
                y_left = y
            if x + w > x_right:
                x_right = x + w
            if y + h > y_right:
                y_right = y + h
        if (x_left == 0 and y_left == 0 and x_right == self.width
                and y_right == self.height) == False:
            if self.human_in_frame and y_left != -1:
                # cv2.rectangle(image, (x_left, y_left), (x_right, y_right), (0, 255, 0), 2)
                self.secondNeck = y_left
                print('second Neck : ', self.secondNeck)
                self.recordYTopRectangle = self.recordYTopRectangle + [
                    self.secondNeck
                ]
        # cv2.imshow('na',fgmask)
    def processFall(self, image):
        print('processing falling ---------')
        totalTime = 0
        loop = 1
        for i in range(1, len(self.recordTimeList)):
            totalTime += self.recordTimeList[-i] - self.recordTimeList[-(i +
                                                                         1)]
            loop += 1
            if totalTime >= 1:
                break
        print('totalTime(velocity):', totalTime, loop)
        if len(self.recordVelocity) >= 2:

            #calculate acceleration
            ac = (max(self.recordVelocity[-loop:]) - min(
                self.recordVelocity[-loop:])) / abs(self.recordTimeList[-1] -
                                                    self.recordTimeList[-loop])
            self.recordAcceleration += [ac]

            print('acceleration :', self.recordAcceleration[-loop:])
        print('highestNeck', self.highestNeck)
        print('highestHIP', self.highestHIP)
        print('time duration : ',
              (self.recordTimeList[-1] - self.recordTimeList[-2]))
        print('max-Velocity :', max(self.recordVelocity[-loop:]))
        print('velocityCurrent:', self.recordVelocity[-loop:])
        if self.highestHIP != 0 and len(
                self.recordNeck) > 1 and self.surpriseMovingTime == -1:
            #NECK new Y point > NECK lastest Y point      falling
            #high , y low     || low , y high
            print('LAST_NECK', self.getLastNeck(), 'HIGHTEST_HIP',
                  self.highestHIP)

            #get max human's velocity in last 1 second
            vHumanFall = max(self.recordVelocity[-loop:])

            t = self.recordTimeList[-1]

            #get minimum time per frame in last 1 second
            for i in range(1, loop):
                if abs(self.recordTimeList[-i] -
                       self.recordTimeList[-(i + 1)]) < t:
                    t = abs(self.recordTimeList[-i] -
                            self.recordTimeList[-(i + 1)])

            print(max(self.recordAcceleration[-loop:]),
                  ((self.highestHIP - self.highestNeck) / (t**2)))

            # Max velcity
            vM = (self.highestHIP - self.highestNeck) / t
            # Max acceleration
            aM = (
                (self.highestHIP - self.highestNeck) /
                t) / abs(self.recordTimeList[-1] - self.recordTimeList[-loop])

            i = 0.3
            print((vHumanFall / vM) * (1 - i) + i *
                  (max(self.recordAcceleration[-loop:]) / (aM)), '> 0.35 ??')
            if self.getLastNeck() < self.highestHIP:
                self.quoutaFalling = 0
            if self.getLastNeck(
            ) >= self.highestHIP and self.quoutaFalling < 2:
                print('~~falling~~')
                self.quoutaFalling += 1

                # final equation after normalized and weight wA at 0.3
                if ((vHumanFall / vM) * (1 - i) + i *
                    (max(self.recordAcceleration[-loop:]) /
                     (aM))) > 0.35:  #0.4
                    self.detecedFirstFalling()

        elif self.surpriseMovingTime != -1:
            self.countdownFalling()
            if self.globalTime - self.surpriseMovingTime >= 2 and (
                    self.getLastNeck() <=
                (self.detectedHIP_Y - self.extraDistance)):
                print('Recover From STATE')
                print('---------------------------------------')
                self.destroyAll()
            #in 10 second person not movig up --> FALL DETECTED
            elif self.globalTime - self.surpriseMovingTime >= 10:
                print('Warning : Falling happening')
                self.setFalling()

    def mesh(self, image):
        image = common.read_imgfile(image, None, None)
        image = cv2.resize(image, (self.width, self.height))
        print('start-inderence', time.time())
        humans = self.e.inference(image, scales=[None])
        print('end-inderence', time.time())
        self.resetBitFalling()
        self.savesecondNeck(image)
        package = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
        self.globalTime = time.time()  #time of after drawing
        image = package[0]
        #status_part_body_appear = package[1]
        center_each_body_part = package[2]
        #camera not found NECK more than 10 second then reset list
        if self.globalTime - self.getLastRecordTime() >= 12:
            print('RESET STABLE,RECORDNECK,HIP,etc. [complete 12 second]')
            self.destroyAll()
        if self.globalTime - self.getLastRecordTime() >= 2:
            print('maybe NECK or HUMAN not found [complete 2 second]')
            self.human_in_frame = False
        print('end Initialize mesh')
        # print(status_part_body_appear)
        #when draw2D stick man
        # name_part_body = ["Nose",  # 0
        #                   "Neck",  # 1
        #                   "RShoulder",  # 2
        #                   "RElbow",  # 3
        #                   "RWrist",  # 4
        #                   "LShoulder",  # 5
        #                   "LElbow",  # 6
        #                   "LWrist",  # 7
        #                   "RHip",  # 8
        #                   "RKnee",  # 9
        #                   "RAnkle",  # 10
        #                   "LHip",  # 11
        #                   "LKnee",  # 12
        #                   "LAnkle",  # 13
        #                   "REye",  # 14
        #                   "LEye",  # 15
        #                   "REar",  # 16
        #                   "LEar",  # 17
        #                   ]
        # detected_part = []
        self.addRecordTime(self.globalTime)
        print('start record everything')
        if 1 in center_each_body_part:
            # print(self.globalTime - self.getLastRecordTime())
            self.addCountTimes()
            self.human_in_frame = True
            self.lastTimesFoundNeck = self.recordTimeList[-1]
            self.used_quotaVirtureNeck = 0
            self.addRecordNeck(center_each_body_part[1][1])
            if len(self.recordNeck) >= 2:
                self.addRecordVelocity(self.recordNeck, self.recordTimeList)
            if 11 in center_each_body_part:
                self.addRecordHIP(center_each_body_part[11][1])
                print('neck :| HIP: ',
                      self.recordHIP[-1] - self.recordNeck[-1])
            elif 8 in center_each_body_part:
                self.addRecordHIP(center_each_body_part[8][1])
                print('neck :| HIP: ',
                      self.recordHIP[-1] - self.recordNeck[-1])
        elif self.used_quotaVirtureNeck < self.quotaVirtureNeck and self.secondNeck >= self.getLastNeck(
        ):
            # print(self.globalTime - self.getLastRecordTime())
            self.addCountTimes()
            self.lastTimesFoundNeck = self.recordTimeList[-1]
            self.addRecordNeck(self.getSecondNeck())
            if len(self.recordNeck) >= 2:
                self.addRecordVelocity(self.recordNeck, self.recordTimeList)
            print('addSecond Neck', self.used_quotaVirtureNeck)
            self.used_quotaVirtureNeck += 1
        if len(self.recordNeck) > 300:
            self.reduceRecord()
        # print('find highest neck , hip')
        totalTime = 0
        loop = 1
        for i in range(1, len(self.recordTimeList)):
            totalTime += self.recordTimeList[-i] - self.recordTimeList[-(i +
                                                                         1)]
            loop += 1
            if totalTime >= 2:
                break
        print('totalTime:', totalTime, loop)
        minNumber = -1
        if len(self.recordNeck) < loop:
            loop = len(self.recordNeck)
        for i in range(1, loop + 1):
            if minNumber == -1 or self.recordNeck[-i] <= minNumber:
                self.highestNeck = self.recordNeck[
                    -i]  #more HIGH more low value
                # self.highestNeckTime = self.recordTimeList[-i]
                minNumber = self.recordNeck[-i]
        if len(self.recordHIP) > 1:
            #11 L_HIP
            if 11 in center_each_body_part:
                self.highestHIP = min(self.recordHIP[-loop:])
            #8 R_HIP
            elif 8 in center_each_body_part:
                self.highestHIP = min(self.recordHIP[-loop:])
        if len(self.recordNeck) > 1:
            self.processFall(image)
            print('end processing falling end mash()')

    def setFalling(self):
        self.bitFalling = 1

    def getBitFalling(self):
        return self.bitFalling

    def resetBitFalling(self):
        self.bitFalling = 0
class dataScriptGenerator(object):
    def __init__(self):
        self.parser = argparse.ArgumentParser(
            description='tf-pose-estimation run')
        self.parser.add_argument(
            '--resolution',
            type=str,
            default='432x368',
            help='network input resolution. default=432x368')
        self.parser.add_argument('--model',
                                 type=str,
                                 default='mobilenet_thin',
                                 help='cmu / mobilenet_thin')
        self.parser.add_argument(
            '--scales',
            type=str,
            default='[None]',
            help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
        self.args = self.parser.parse_args()
        self.scales = ast.literal_eval(self.args.scales)

        self.w, self.h = model_wh(self.args.resolution)
        self.e = TfPoseEstimator(get_graph_path(self.args.model),
                                 target_size=(self.w, self.h))

    # This method is called to return all humans found in images within the OurTest images folder
    # Generates a csv file with the latest human skeleton keypoint data
    # Also plots each skeleton onto the images
    def adHocData(self):
        directory_in_str = sys.path[0] + "\\..\\images\\OurTest\\"

        # Delete old csv files
        try:
            os.remove(outputfile)
            os.remove(cleanedOutputfile)
        except OSError:
            pass

        # Run through every image in the folder
        for file in os.listdir(directory_in_str):
            filename = os.fsdecode(file)
            if filename.endswith(".jpg") or filename.endswith(".png"):
                fullpath = directory_in_str + filename

                # Estimate human poses from a single image !
                image = common.read_imgfile(fullpath, None, None)
                # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
                t = time.time()
                humans = self.e.inference(image, scales=self.scales)
                elapsed = time.time() - t

                logger.info('inference image: %s in %.4f seconds.' %
                            (fullpath, elapsed))

                image = TfPoseEstimator.draw_humans(image,
                                                    humans,
                                                    imgcopy=False)
                # cv2.imshow('tf-pose-estimation result', image)
                # cv2.waitKey()

                myFile = open(outputfile, 'a')
                # myFile.write(str(filename) + ',')
                # print(filename)
                myFile.write('\n')
                myFile.close()

                # Attempt to plot our skeletons onto the image
                try:

                    fig = plt.figure()
                    a = fig.add_subplot(2, 2, 1)
                    a.set_title('Result')
                    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

                    bgimg = cv2.cvtColor(image.astype(np.uint8),
                                         cv2.COLOR_BGR2RGB)
                    bgimg = cv2.resize(
                        bgimg,
                        (self.e.heatMat.shape[1], self.e.heatMat.shape[0]),
                        interpolation=cv2.INTER_AREA)

                    # show network output
                    a = fig.add_subplot(2, 2, 2)
                    plt.imshow(bgimg, alpha=0.5)
                    tmp = np.amax(self.e.heatMat[:, :, :-1], axis=2)
                    plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5)
                    plt.colorbar()

                    tmp2 = self.e.pafMat.transpose((2, 0, 1))
                    tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0)
                    tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0)

                    a = fig.add_subplot(2, 2, 3)
                    a.set_title('Vectormap-x')
                    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
                    plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5)
                    plt.colorbar()

                    a = fig.add_subplot(2, 2, 4)
                    a.set_title('Vectormap-y')
                    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
                    plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5)
                    plt.colorbar()
                    plt.show()

                    logger.info('3d lifting initialization.')
                    poseLifting = Prob3dPose(
                        sys.path[0] +
                        '\\lifting\\models\\prob_model_params.mat')

                    image_h, image_w = image.shape[:2]
                    standard_w = 640
                    standard_h = 480

                    pose_2d_mpiis = []
                    visibilities = []
                    for human in humans:
                        pose_2d_mpii, visibility = common.MPIIPart.from_coco(
                            human)
                        pose_2d_mpiis.append([(int(x * standard_w + 0.5),
                                               int(y * standard_h + 0.5))
                                              for x, y in pose_2d_mpii])
                        visibilities.append(visibility)

                    pose_2d_mpiis = np.array(pose_2d_mpiis)
                    visibilities = np.array(visibilities)
                    transformed_pose2d, weights = poseLifting.transform_joints(
                        pose_2d_mpiis, visibilities)
                    pose_3d = poseLifting.compute_3d(transformed_pose2d,
                                                     weights)

                    for i, single_3d in enumerate(pose_3d):
                        plot_pose(single_3d)
                    plt.show()

                except:
                    print("Error when plotting image ")

        dataScriptGenerator.dataCleanup(self)

    # This method is called to return all humans found in the latest image within LiveTest folder
    # Generates a csv file with the human skeleton keypoint data
    # Does not plot skeletons onto images, this is as basic and optimized as possible
    def liveData(self):
        directory_in_str = sys.path[0] + r"/../images/LiveTest/"

        try:
            os.remove(outputfile)
            os.remove(cleanedOutputfile)
        except OSError:
            pass

        for file in os.listdir(directory_in_str):
            filename = os.fsdecode(file)
            if filename.endswith(".jpg") or filename.endswith(".png"):
                fullpath = directory_in_str + filename

                # estimate human poses from a single image !
                image = common.read_imgfile(fullpath, None, None)

                humans = self.e.inference(image, scales=self.scales)

                image = TfPoseEstimator.draw_humans(image,
                                                    humans,
                                                    imgcopy=False)
                # cv2.imshow('tf-pose-estimation result', image)
                # cv2.waitKey()

                myFile = open(outputfile, 'a')
                # myFile.write(str(filename) + ',')
                # print(filename)
                myFile.write('\n')
                # break
                myFile.close()

    # Cleans the generated csv file, removing data which has one or less knee keypoints missing
    def dataCleanup(self):
        keypointData = open(outputfile, 'r')
        keypointReader = csv.reader(keypointData)

        cleanKeypointData = open(cleanedOutputfile, 'w', newline='')
        cleanKeypointWriter = csv.writer(cleanKeypointData)

        for row in keypointReader:
            badKeypointCount = 0

            # 16 is the start of our x y pairs for L and R hip, knee and ankle keypoints
            for keypointIndex in range(16, 28, 2):

                if (row[keypointIndex] == "-1"):
                    badKeypointCount += 1

            if (badKeypointCount < 4):
                # Good data, lets write it to our new clean file
                cleanKeypointWriter.writerow(row[:len(row) - 1])

        keypointData.close()
        cleanKeypointData.close()
    try:
        while 1:
            start = time.time()  # 用于计算帧率信息
            length = recvall(conn, 16)  # 获得图片文件的长度,16代表获取长度
            stringData = recvall(conn, int(length))  # 根据获得的文件长度,获取图片文件
            data = numpy.frombuffer(stringData,
                                    numpy.uint8)  # 将获取到的字符流数据转换成1维数组
            image = cv2.imdecode(data, cv2.IMREAD_COLOR)  # 将数组解码成图像
            print(image)

            width = int(image.shape[1])
            height = int(image.shape[0])
            # print(width,height)

            ##2D姿态识别
            humans = model.inference(image)
            if not args.showBG:
                image = np.zeros(image.shape)
            scales = ast.literal_eval(node_or_string='[None]')
            humans = model.inference(image, scales=scales)
            image = model.draw_humans(image, humans, imgcopy=False)
            # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                        (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                        2)
            # 生成2D姿态视频
            video.write(image)

            ##2D-->3D
            poseLifting = Prob3dPose('./lifting/models/prob_model_params.mat')
            image_h, image_w = image.shape[:2]
    def post(self):
        global fallen
        if ((self.request.headers['Content-Type'] == 'imagebin')):
            print('Received image')
            image = self.request.body
            fh = open('static/image1.jpg', 'wb')
            fh.write(image)
            fh.close()
            #fh = open('static/image1.jpg','ab')
            #fh.write(bytes([0xD9]))
            #fh.close()
            print('0')
            #image = cv2.imread('static/image1.jpg')
            print('1')
            print('2')

            parser = argparse.ArgumentParser(
                description='tf-pose-estimation run')
            parser.add_argument(
                '--resolution',
                type=str,
                default='432x368',
                help='network input resolution. default=432x368')
            parser.add_argument('--model',
                                type=str,
                                default='mobilenet_thin',
                                help='cmu / mobilenet_thin')
            parser.add_argument(
                '--scales',
                type=str,
                default='[None]',
                help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
            args = parser.parse_args()
            scales = ast.literal_eval(args.scales)

            w, h = model_wh(args.resolution)
            e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

            # estimate human poses from a single image !
            image = common.read_imgfile('static/image1.jpg', None, None)
            # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            t = time.time()
            humans = e.inference(image, scales=scales)
            elapsed = time.time() - t

            logger.info('inference image: image3.jpg in %.4f seconds.' %
                        (elapsed))

            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
            fallen = 'OKAY'
            for i, h in enumerate(humans):
                TORSO_INDEX = 1
                LEFT_HIP_INDEX = 8
                RIGHT_HIP_INDEX = 11
                RIGHT_HAND_INDEX = 4
                RIGHT_FOOT_INDEX = 12
                LEFT_FOOT_INDEX = 9

# and RIGHT_HAND_INDEX in parts and RIGHT_FOOT_INDEX in parts and LEFT_FOOT_INDEX in parts:

            parts = h.body_parts
            if TORSO_INDEX in parts and LEFT_HIP_INDEX in parts and RIGHT_HIP_INDEX in parts:

                torso = parts[TORSO_INDEX]
                left_hip = parts[LEFT_HIP_INDEX]
                right_hip = parts[RIGHT_HIP_INDEX]

                tx, ty = torso.x, torso.y
                lhx, lhy = left_hip.x, left_hip.y
                rhx, rhy = right_hip.x, right_hip.y

                if tx < lhx or tx > rhx:
                    fallen = 'FALL'

                if abs(lhy - ty) < 0.1 or abs(rhy - ty) < 0.1:
                    fallen = 'FALL'
                if RIGHT_HAND_INDEX in parts and RIGHT_FOOT_INDEX in parts and LEFT_FOOT_INDEX in parts:
                    right_foot = parts[RIGHT_FOOT_INDEX]
                    left_foot = parts[LEFT_FOOT_INDEX]
                    right_hand = parts[RIGHT_HAND_INDEX]
                    righax, righay = right_hand.x, right_hand.y
                    rfx, rfy = right_foot.x, right_foot.y
                    lfx, lfy = left_foot.x, left_foot.y
                    if abs(lfy - lhy) < 0.1 or abs(rhy - ty) < 0.1:
                        fallen = 'FALL'
                    if (lfy - lhy) > (lhy - ty):
                        fallen = 'FALL'
                    print(lfy - lhy, lhy - ty)
                    lowermag = math.sqrt((lfy - lhy) * (lfy - lhy) +
                                         (lhx - lfx) * (lhx - lfx))
                    uppermag = math.sqrt((lhy - ty) * (lhy - ty) + (tx - lhx) *
                                         (tx - lhx))
                    if lowermag > uppermag:
                        fallen = 'FALL'
            #cv2.putText(image,
            #               f"Fallen: False",
            #              (60, 60),  cv2.FONT_HERSHEY_SIMPLEX, 2,
            #             (0, 255, 0), 5)

            cv2.putText(image, f"Fallen: {fallen}", (60, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 5)

            cv2.imwrite('static/image3.jpg', image)
            for client in clients:
                update_clients(client)
Beispiel #22
0
            userToken = q["userToken"]
            taskList = int(q["taskList"])

            db.hset(imageID, "userToken", userToken)
            db.hset(imageID, "image", image)  # 此处的image是 base64格式的 raw picture

            img_np = data_uri_to_cv2_img(image)

            print("face recognition %d" % i)

            # 2.  open pose deals imgs:
            #     # (1) get result picture
            #     humans = e.inference(img_np, scales=scales)
            #     imageRuselt = TfPoseEstimator.draw_humans(img_np, humans, imgcopy=False)  #mat格式
            # (2) get result json
            humans = e.inference(img_np, scales=scales)
            # imageRuseltDic = TfPoseEstimator.draw_humans(img_np, humans, imgcopy=False)

            frozen = jsonpickle.encode(
                humans, unpicklable=False
            )  # 序列化human对象并存入,humans是一个human对象的list,human对象里面包含BordParts, 其中有每个点的信息的x,y坐标值

            # # mat转base64, without 'data:image/jpeg;base64,' this head
            # imageRuselt1 = cv2.imencode('.jpg', imageRuselt)[1]
            # base64_data = base64.b64encode(imageRuselt1)
            # db.hset(imageID, "image", base64_data)  # 此处的image是 base64格式的

            db.hset(imageID, "poseResult", frozen
                    )  # 将识别的结果poseResult (不是图片是json数据,后面连图片一起取出来标注)存入 hashset

            taskDone = db.hget(
Beispiel #23
0
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0],
                   dx:dx + img_scaled.shape[1]] = img_scaled
            image_rgb = canvas

        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image_rgb,
                                    None,
                                    fx=args.zoom,
                                    fy=args.zoom,
                                    interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image_rgb.shape[1]) // 2
            dy = (img_scaled.shape[0] - image_rgb.shape[0]) // 2
            image1 = img_scaled[dy:image1.shape[0], dx:image_rgb.shape[1]]
        humans = e.inference(image_rgb)
        image_rgb = TfPoseEstimator.draw_humans(image_rgb,
                                                humans,
                                                imgcopy=False)

        if humans:
            # distance values call (humans list length = people).
            # Nose = 0
            # Neck = 1
            # RShoulder = 2
            # RElbow = 3
            # RWrist = 4
            # LShoulder = 5
            # LElbow = 6
            # LWrist = 7
            # RHip = 8
def pose_comparison():
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p1.jpg')
    parser.add_argument('--resolution',
                        type=str,
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    ref_image = common.read_imgfile(REF_POSE_PATH, None, None)
    ref_image = cv2.resize(ref_image, (640, 480))
    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    image = cv2.resize(image, (640, 480))
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    #t = time.time()

    ref_humans = e.inference(ref_image, scales=scales)
    humans = e.inference(image, scales=scales)

    #elapsed = time.time() - t

    #logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    _, ref_centers = TfPoseEstimator.draw_humans_mine(ref_image,
                                                      ref_humans,
                                                      imgcopy=False)
    _, centers = TfPoseEstimator.draw_humans_mine(image, humans, imgcopy=False)

    ref_centers = list(ref_centers.values())
    centers = list(centers.values())

    ref_centers = list(sum(ref_centers, ()))
    centers = list(sum(centers, ()))

    ref_centers = np.array(ref_centers, dtype=int)
    centers = np.array(centers, dtype=int)

    shapes = []
    shapes.append(ref_centers)
    shapes.append(centers)

    #create canvas on which the triangles will be visualized
    canvas = np.full([640, 480], 255).astype('uint8')

    #convert to 3 channel RGB for fun colors!
    canvas = cv2.cvtColor(canvas, cv2.COLOR_GRAY2RGB)
    #im = draw_shapes(canvas,shapes)

    x, y = get_translation(shapes[0])
    new_shapes = []
    new_shapes.append(shapes[0])

    for i in range(1, len(shapes)):
        new_shape = procrustes_analysis(shapes[0], shapes[i])
        new_shape[::2] = new_shape[::2] + x
        new_shape[1::2] = new_shape[1::2] + y
        new_shape = new_shape.astype(int)
        new_shapes.append(new_shape)

    pts_list = []

    for lst in new_shapes:
        temp = lst.reshape(-1, 1, 2)
        pts = list(map(tuple, temp))
        pts_list.append(pts)

    for i in range(18):
        cv2.circle(ref_image,
                   tuple(pts_list[0][i][0]),
                   3, (255, 0, 0),
                   thickness=3,
                   lineType=8,
                   shift=0)
        cv2.circle(ref_image,
                   tuple(pts_list[1][i][0]),
                   3, (255, 255, 0),
                   thickness=3,
                   lineType=8,
                   shift=0)

    cv2.imshow('tf-pose-estimation result', ref_image)
    cv2.waitKey(0)

    variations = []
    for i in range(len(new_shapes)):
        dist = procrustes_distance(shapes[0], new_shapes[i])
        variations.append(dist)

    print(variations)
Beispiel #25
0
    parser.add_argument('--showBG',
                        type=bool,
                        default=True,
                        help='False to show skeleton only.')
    args = parser.parse_args()

    w, h = 432, 368
    e = TfPoseEstimator('graph/{}/graph_freeze.pb'.format(args.model),
                        target_size=(w, h))
    cap = cv2.VideoCapture(args.video)

    if cap.isOpened() is False:
        print("Error opening video stream or file")
    while cap.isOpened():
        ret_val, image = cap.read()
        tic = time.time()
        humans = e.inference(image, resize_to_default=True, upsample_size=4.0)
        if not args.showBG:
            image = np.zeros(image.shape)
        res = TfPoseEstimator.draw_humans(image, humans, imgcopy=True)
        cv2.putText(res, "FPS: %f" % (1.0 / (time.time() - tic)), (10, 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.imshow('rr', res)
        toc = time.time()
        logger.info('inference %.4f seconds.' % (toc - tic))

        if cv2.waitKey(1) == 27:
            break
    cv2.destroyAllWindows()
logger.debug('finished+')
Beispiel #26
0
def pose_dd():
    fps_time = 0
    global humans
    global BREAK 
    global image 
    parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--show-process', type=bool, default=False,
                        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()

    ##logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    #logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    ##logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        #logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        #logger.debug('image process+')
        humans = e.inference(image)

        #logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        #logger.debug('show+')
        cv2.putText(image,
                    "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        #out.write(image)
        if cv2.waitKey(1) == 27:
            #out.release()
            cv2.destroyAllWindows()
            BREAK = True
            clientsocket.send("off".encode('utf-8'))
            print("off sent")
            import sys
            sys.exit(0)
            break
Beispiel #27
0
    # parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg')
    parser.add_argument('--image', type=str, default='./images/apink2.jpg')
    # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240')
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(scales)

    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=[None])
    # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)])
    # humans = e.inference(image, scales=[(1.2, 0.05)])
    # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)])
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = cv2.imread(args.image, cv2.IMREAD_COLOR)
    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    cv2.imshow('tf-pose-estimation result', image)
    cv2.waitKey()

    import sys
    sys.exit(0)
Beispiel #28
0
        default='mobilenet_thin_432x368',
        help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()

    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=[None])
    # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)])
    # humans = e.inference(image, scales=[(1.2, 0.05)])
    # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)])
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = cv2.imread(args.image, cv2.IMREAD_COLOR)
    image, b = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    cv2.imshow('tf-pose-estimation result', image)
    cv2.waitKey()

    logger.info('3d lifting initialization.')
    poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat')
Beispiel #29
0
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0],
                   dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image,
                                    None,
                                    fx=args.zoom,
                                    fy=args.zoom,
                                    interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        #logger.debug('image process+')
        humans = e.inference(image)

        #logger.debug('postprocess+')
        #image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        #logger.debug('show+')

        cv2.putText(image, text_display, (100, 400), cv2.FONT_HERSHEY_SIMPLEX,
                    3, (255, 255, 255), 3)
        cv2.putText(image, "|", (250, 400), cv2.FONT_HERSHEY_SIMPLEX, 3,
                    (255, 255, 255), 3)

        image = cv2.resize(image, (800, 600))
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        if cv2.waitKey(1) == 27:
Beispiel #30
0
def main():

    global fps_time

    if len(sys.argv) != 2:
        print("Please specify path to .svo file.")
        exit()

    filepath = sys.argv[1]
    ite = loadall("pickle.dat")
    print("Reading SVO file: {0}".format(filepath))
    t = time.time()
    init = zcam.PyInitParameters(svo_input_filename=filepath,
                                 svo_real_time_mode=False)
    init.depth_mode = sl.PyDEPTH_MODE.PyDEPTH_MODE_QUALITY
    cam = zcam.PyZEDCamera()
    status = cam.open(init)
    if status != tp.PyERROR_CODE.PySUCCESS:
        print(repr(status))
        exit()

    runtime = zcam.PyRuntimeParameters()
    mat = core.PyMat()
    depth = core.PyMat()
    print('Initilisation of svo took ', time.time() - t, ' seconds')
    key = ''
    graph_path = "./models/graph/mobilenet_thin/graph_opt.pb"  #"./models/graph/cmu/graph_opt.pb"
    target = (432, 368)  #(656,368) (432,368) (1312,736)
    e = TfPoseEstimator(graph_path, target)
    nbf = 0
    nbp = 0
    print("  Save the current image:     s")
    print("  Quit the video reading:     q\n")
    while key != 113:  # for 'q' key
        t = time.time()
        err = cam.grab(runtime)
        if err == tp.PyERROR_CODE.PySUCCESS:
            retrieve_start = time.time()
            cam.retrieve_image(mat)
            cam.retrieve_measure(depth, sl.PyMEASURE.PyMEASURE_XYZRGBA)
            image = mat.get_data()
            retrieve_end = time.time()
            print('Retrieving of svo data took ',
                  retrieve_end - retrieve_start, ' seconds')
            pt = np.array(depth.get_data()[:, :, 0:3], dtype=np.float64)
            print(pt.shape, image.shape)
            pt[np.logical_not(np.isfinite(pt))] = 0.0
            nbf += 1
            pose_start = time.time()
            humans = e.inference(np.array(image[:, :, 0:3]))
            image_h, image_w = image.shape[:2]
            pose_end = time.time()
            print("Inference took", pose_end - pose_start, 'seconds')

            for pid, human in enumerate(humans):
                for kid, bdp in enumerate(human.body_parts.values()):
                    #print(bdp)
                    #print(kid, bdp.x, bdp.y, bdp.score)
                    if (bdp.score > 5.0):
                        print((int(bdp.x * image_w + 0.5),
                               int(bdp.y * image_h + 0 / 5)))
                        print(pt[int(bdp.y * image_h + 0 / 5),
                                 int(bdp.x * image_w + 0.5)])
                        nbp += 1
                        cv2.circle(image, (int(bdp.x * image_w + 0.5),
                                           int(bdp.y * image_h + 0 / 5)),
                                   5, (255, 255, 255),
                                   thickness=5,
                                   lineType=8,
                                   shift=0)
                        #coord_uv[pid,kid,:]=np.array([int(bdp.x*image_w+0.5),int(bdp.y*image_h+0/5)])
                        #coord_vis[pid,kid]=bdp.score/10

            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
            cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                        (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                        2)

            #print(time.time()-t)
            cv2.imshow('tf-pose-estimation result', image)
            fps_time = time.time()
            key = cv2.waitKey(1)
            saving_image(key, mat)
            #time.sleep(0.033)
        else:
            key = cv2.waitKey(1)
    cv2.destroyAllWindows()

    print(nbp / nbf, (nbp / nbf) / 18)
    #saving_depth(cam)
    #saving_point_cloud(cam)

    cam.close()
    print("\nFINISH")
    def post(self):
        global finished_post
        if ((self.request.headers['Content-Type'] == 'imagebin')
                and (finished_post == 1)):
            print(finished_post)
            finished_post = 0
            print('Received image')
            image = self.request.body
            fh = open('static/image1.jpg', 'wb')
            fh.write(image)
            fh.close()
            #fh = open('static/image1.jpg','ab')
            #fh.write(bytes([0xD9]))
            #fh.close()
            print('0')
            #image = cv2.imread('static/image1.jpg')
            print('1')
            print('2')

            parser = argparse.ArgumentParser(
                description='tf-pose-estimation run')
            parser.add_argument(
                '--resolution',
                type=str,
                default='432x368',
                help='network input resolution. default=432x368')
            parser.add_argument('--model',
                                type=str,
                                default='mobilenet_thin',
                                help='cmu / mobilenet_thin')
            parser.add_argument(
                '--scales',
                type=str,
                default='[None]',
                help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
            args = parser.parse_args()
            scales = ast.literal_eval(args.scales)

            w, h = model_wh(args.resolution)
            e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

            # estimate human poses from a single image !
            image = common.read_imgfile('static/image1.jpg', None, None)
            # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            t = time.time()
            humans = e.inference(image, scales=scales)
            elapsed = time.time() - t

            logger.info('inference image: image3.jpg in %.4f seconds.' %
                        (elapsed))

            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            #cv2.putText(image,
            #               f"Fallen: False",
            #              (60, 60),  cv2.FONT_HERSHEY_SIMPLEX, 2,
            #             (0, 255, 0), 5)

            cv2.imwrite('static/image3.jpg', image)

            for client in clients:
                update_clients(client)

            print(finished_post)
            finished_post = 1
Beispiel #32
0
def doCNNTracking(args):    
    global is_tracking,logger

    fps_time = 0

    logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    pipeline = rs.pipeline()
    pipeline.start()
    
    frames = pipeline.wait_for_frames()
    #get depth影像
    depth = frames.get_depth_frame() 
    depth_image_data = depth.as_frame().get_data()
    depth_image = np.asanyarray(depth_image_data)
    
    
    
    logger.info('cam depth image=%dx%d' % (depth_image.shape[1], depth_image.shape[0])) 
    logger.info('camera ready') 
    
    
    #計算depth影像對應至rgb影像的clip
    clip_box = [100,-100,290,-300]
    

    while (True):
        if(is_tracking):
            fps_time = time.time()
            frames = pipeline.wait_for_frames()
            #get rgb影像
            image_frame = frames.get_color_frame()
            image_data = image_frame.as_frame().get_data()
            image = np.asanyarray(image_data)
            
            #change bgr 2 rgb
            image = np.array(image[...,::-1])
            origen_image = image

            #get depth影像
            depth = frames.get_depth_frame() 
            depth_image_data = depth.as_frame().get_data()
            depth_image = np.asanyarray(depth_image_data)
            depth_image = depth_image[(int)(clip_box[0]):(int)(clip_box[1]),(int)(clip_box[2]):(int)(clip_box[3])]
            depth_image = cv2.resize(depth_image, (640, 480), interpolation=cv2.INTER_CUBIC)
            depth_image=depth_image/30
            depth_image.astype(np.uint8)
            
            #深度去背的遮罩
            thresh=cv2.inRange(depth_image,20,200)

            #去背的遮罩做影像處理
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1, 1))  
            eroded = cv2.erode(thresh,kernel)
            kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT,(5, 5))  
            dilated = cv2.dilate(eroded,kernel2)
            
            #亮度遮罩
            bright_mask = np.zeros(image.shape);
            bright_mask.fill(200)
            bright_mask = bright_mask.astype(np.uint8);
            bright_mask = cv2.bitwise_and(bright_mask, bright_mask, mask=dilated)
            
            #rgb影像亮度處理
#             image = cv2.bitwise_and(image, image, mask=dilated)
            
            image = image.astype(int)+200-bright_mask.astype(int);
            image = np.clip(image, 0, 255)
            image = image.astype(np.uint8);
            

            
            
            
            #tfpose image 縮放
            if args.zoom < 1.0:
                canvas = np.zeros_like(image)
                img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
                dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
                dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
                canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
                image = canvas
            elif args.zoom > 1.0:
                img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
                dx = (img_scaled.shape[1] - image.shape[1]) // 2
                dy = (img_scaled.shape[0] - image.shape[0]) // 2
                image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

            #tfpose 計算
            humans = e.inference(image)
            
#             #得到joint
#             jdata = TfPoseEstimator.get_json_data(image.shape[0],image.shape[1],humans)
#             if(len(jdata)>2):
#                 try:
#                     #傳送Position資料至SERVER
#                     chating_room.sendTrackingData(jdata,'track')
#                 except:
#                     print("Cannot send data to server.")
           
            
            #去背後深度影像
            depth_masked = cv2.bitwise_and(depth_image, depth_image, mask=dilated)
            
            human_json_datas = []
            for human in humans:
                #計算深度資料
                depthDatas=[]
                image_h, image_w = image.shape[:2]
                # get point
                for i in range(common.CocoPart.Background.value):
                    if i not in human.body_parts.keys():
                        continue
                    body_part = human.body_parts[i]
                    y= int(body_part.y * image_h+ 0.5)
                    x = int(body_part.x * image_w + 0.5)
                    s=5;
                    mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639]

                    count=0;
                    sum_depth=0;

                    for j in range (mat.shape[0]):
                        for k in range (mat.shape[1]):     
                            if mat[j,k]!=0:
                                sum_depth+=mat[j,k]
                                count+=1
                                
                    if(count>0):
                        depth=sum_depth/count
                    else:
                        depth=0
                        
                    try:
                        depthDatas.append(JointDepthData(i,x,y,depth).__dict__)
                    except:
                        print("err:"+str(x)+" "+str(y)+" "+str(body_part.x )+" "+str(body_part.y ))
                    

                human_json_datas.append(json.dumps(depthDatas))
            json_data = json.dumps(human_json_datas)
            if(len(json_data)>2):
                try:
                    #傳送Depth資料至SERVER
                    chating_room.sendTrackingData(json_data,'track_depth')
                except:
                    print("Cannot send depth data to server.")
                        
            depth_image =  cv2.applyColorMap(cv2.convertScaleAbs(depth_image/25), cv2.COLORMAP_JET)
            cv2.circle(image,(320,240), 5, (255,255,255), -1)
            cv2.circle(image,(304,480-98), 5, (0,0,255), -1)
            cv2.circle(image,(377,480-197), 5, (0,160,255), -1)
            cv2.circle(image,(106,480-49), 5, (0,255,255), -1)
            cv2.circle(image,(460,480-136), 5, (0,255,0), -1)
            cv2.circle(image,(481,480-134), 5, (255,0,0), -1)
            cv2.circle(image,(85,480-143), 5, (255,160,0), -1)
            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            cv2.putText(image,"FPS: %f" % (1.0 / (time.time() - fps_time)),(10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 0), 2)
            
            cv2.imshow('tf-pose-estimation result', image)
            
            if cv2.waitKey(25) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break


    cv2.destroyAllWindows()