def __init__(self,
              zoom=1.0,
              resolution='656x368',
              model='cmu',
              show_process=False):
     self.zoom = zoom
     self.resolution = resolution
     self.model = model
     self.show_process = show_process
     logger.debug('initialization %s : %s' % (model, get_graph_path(model)))
     self.w, self.h = model_wh(resolution)
     self.e = TfPoseEstimator(get_graph_path(model),
                              target_size=(self.w, self.h))
Esempio n. 2
0
def PoseEstimatorPredict(image_path,plot = False,resolution ='432x368', scales = '[None]',model = 'mobilenet_thin'):
    '''
    input:
        image_path,图片路径,jpg
        plot = False,是否画图,如果True,两样内容,关键点信息+标点图片matrix
        resolution ='432x368', 规格
        scales = '[None]',
        model = 'mobilenet_thin',模型选择
    
    output:
        plot为false,返回一个内容:关键点信息
        plot为true,返回两个内容:关键点信息+标点图片matrix
    '''
    w, h = model_wh(resolution)
    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    image = common.read_imgfile(image_path, None, None)
    t = time.time()
    humans = e.inference(image, scales=scales)  # 主要的预测函数
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (image_path, elapsed))
    centers = get_keypoint(image,humans)                                # 拿上关键点信息

    if plot:
        # 画图的情况下:
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)  # 画图函数
        fig = plt.figure()
        a = fig.add_subplot(2, 2, 1)
        a.set_title('Result')
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        return centers,image
    else:
        # 不画图的情况下:
        return centers
Esempio n. 3
0
def main(input_img, model, e):
  '''
  Query the model given an image
  '''
  if(input_img):
    image = stringToImage(input_img[input_img.find(",")+1:])
    image = toRGB(image)

    if(model == None):
      model = 'mobilenet_thin'

    humans = e.inference(image)
    coords = []
    for human in humans:
      coords.append([[HUMAN_COCO_PART[k], b.x, b.y] for k, b in human.body_parts.items()])

    outdata = {
      'humans': coords
    }
    return outdata

  else:
    # Test with a sample image
    image = common.read_imgfile('./images/p1.jpg', None, None)
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368))
    humans = e.inference(image)
    coords = []
    for human in humans:
      coords.append([[HUMAN_COCO_PART[k], b.x, b.y] for k, b in human.body_parts.items()])

    outdata = {
      'humans': coords
    }
    return outdata
Esempio n. 4
0
    def __init__(self, input_wicth=800, input_height=640):
        import sys
        sys.path.append('./src/detector/tf-pose-estimation/src')

        self.model = 'cmu'
        import common
        self.enum_coco_parts = common.CocoPart
        self.enum_coco_colors = common.CocoColors
        self.enum_coco_pairs_render = common.CocoPairsRender

        from estimator import TfPoseEstimator
        from networks import get_graph_path
        self.image_h, self.image_w = input_height, input_wicth
        if self.image_w % 16 != 0 or self.image_h % 16 != 0:
            raise Exception(
                'Width and height should be multiples of 16. w=%d, h=%d' %
                (self.image_w, self.image_h))
        print('Warming up detector ConvPoseMachine....')
        import time
        s = time.time()
        self.estimator = TfPoseEstimator(get_graph_path(self.model),
                                         target_size=(self.image_w,
                                                      self.image_h))
        e = time.time()
        print('ConvPoseMachine Warmed, Time: {}'.format(e - s))
Esempio n. 5
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin'
        #model = '432x368'
        camera = 0
        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]

        resolution = '432x368'
        w, h = model_wh(resolution)
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        #print('ret_val', ret_val)
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        keypoints = self.mesh(image)
        print('keypoints', keypoints)

        self.points = gl.GLScatterPlotItem(pos=keypoints,
                                           color=pg.glColor((0, 255, 0)),
                                           size=15)
        #print(keypoints)
        self.window.addItem(self.points)

        for n, pts in enumerate(self.connection):
            self.lines[n] = gl.GLLinePlotItem(pos=np.array(
                [keypoints[p] for p in pts]),
                                              color=pg.glColor((0, 0, 255)),
                                              width=3,
                                              antialias=True)
            self.window.addItem(self.lines[n])
Esempio n. 6
0
def index():
    try:
        data = request.data
        with open('/tmp/temp.jpg', 'wb') as f:
            f.write(data)
        img = common.read_imgfile('/tmp/temp.jpg', 432, 368)
        scales = ast.literal_eval(args.scales)
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
        humans = e.inference(img, scales=scales)
        return jsonify({"humans": list(map(lambda x: x.to_dict(), humans))})
    except Exception as e:
        return jsonify({"error": str(traceback.format_exc())})
Esempio n. 7
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        self.bitFalling = 0
        # Initialize plot.
        plt.ion()
        f2 = plt.figure(figsize=(6, 5))
        self.windowNeck = f2.add_subplot(1, 1, 1)
        self.windowNeck.set_title('Speed')
        self.windowNeck.set_xlabel('Time')
        self.windowNeck.set_ylabel('Speed')

        # plt.show()
        self.times = []
        self.recordVelocity = []
        self.recordNeck = []
        self.recordYTopRectangle = []
        self.recordHIP = []
        self.recordTimeList = []
        self.globalTime = 0
        self.highestNeck = 0
        # self.highestNeckTime = 0
        self.highestHIP = 0
        self.saveTimesStartFalling = -1

        self.surpriseMovingTime = -1
        self.detectedHIP_Y = 0
        self.detectedNECK_Y = 0
        self.extraDistance = 0

        self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1,
                                                       varThreshold=500,
                                                       detectShadows=False)
        self.secondNeck = 0
        self.human_in_frame = False
        self.lastTimesFoundNeck = -1
        self.width = 300
        self.height = 200
        self.quotaVirtureNeck = 3
        self.used_quotaVirtureNeck = 0
        self.quoutaFalling = 0
        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        camera = 0  # 1 mean external camera , 0 mean internal camera
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        # self.cam = cv2.VideoCapture('C:/outpy2.avi')
        self.cam.set(cv2.CAP_PROP_AUTOFOCUS, 0)  # turn the autofocus off
        self.recordAcceleration = []
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(
        description='tf-pose-estimation run by folder')
    parser.add_argument('--folder', type=str, default='./images/')
    parser.add_argument('--resolution',
                        type=str,
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    types = ('*.png', '*.jpg')
    files_grabbed = []
    for files in types:
        files_grabbed.extend(glob.glob(os.path.join(args.folder, files)))
    all_humans = dict()
    if not os.path.exists('output'):
        os.mkdir('output')
    for i, file in enumerate(files_grabbed):
        # estimate human poses from a single image !
        image = common.read_imgfile(file, None, None)
        t = time.time()
        humans = e.inference(image, scales=scales)
        elapsed = time.time() - t

        logger.info('inference image #%d: %s in %.4f seconds.' %
                    (i, file, elapsed))

        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
        # cv2.imshow('tf-pose-estimation result', image)
        filename = 'pose_{}.png'.format(i)
        output_filepath = os.path.join('output', filename)
        cv2.imwrite(output_filepath, image)
        logger.info('image saved: {}'.format(output_filepath))
        # cv2.waitKey(5000)

        all_humans[file.replace(args.folder, '')] = humans

    with open(os.path.join(args.folder, 'pose.dil'), 'wb') as f:
        dill.dump(all_humans, f, protocol=dill.HIGHEST_PROTOCOL)
Esempio n. 9
0
def joint():
    try:
        data = request.data
        with open('/tmp/temp.jpg', 'wb') as f:
            f.write(data)
        img = common.read_imgfile('/tmp/temp.jpg', 432, 368)
        scales = ast.literal_eval(args.scales)
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
        humans = e.inference(img, scales=scales)
        image = TfPoseEstimator.draw_humans(img, humans, imgcopy=False)
        b_str = base64.b64encode(img2bytes(
            Image.fromarray(image))).decode('utf-8')
        return jsonify({"image": b_str})
    except Exception as e:
        return jsonify({"error": str(traceback.format_exc())})
Esempio n. 10
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        # Initialize plot.
        plt.ion()
        f = plt.figure(figsize=(5, 5))
        f2 = plt.figure(figsize=(6, 5))

        self.window3DBody = f.gca(projection='3d')
        self.window3DBody.set_title('3D_Body')
        self.windowStable = f2.add_subplot(1, 1, 1)
        self.windowStable.set_title('Stable')
        self.windowStable.set_xlabel('Time')
        self.windowStable.set_ylabel('Distant')
        self.windowStable.set_ylim([0, 1500])

        #plt.show()
        self.times = [0]
        self.stable = [0]
        self.recordHead = []
        self.fps_time = 0

        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        #model = 'cmu'
        #w,h = 656,368
        camera = 1  #1 mean external camera , 0 mean internal camera

        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read(cv2.IMREAD_COLOR)

        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        self.statusBodyWindow = 0
        try:
            keypoints = self.mesh(image)

        except:
            pass
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin'
        camera = 0
        w, h = model_wh("432x368")
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(0)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose('lifting/models/prob_model_params.mat')
        keypoints = self.mesh(image)
        
        self.points = gl.GLScatterPlotItem(
            pos=keypoints,
            color=pg.glColor((255, 255, 0)),
            size=15
        )
        # self.lines = gl.GLLinePlotItem(
        #     pos=lines,
        #     color=pg.glColor((255, 255, 255)),
        #     width=2
        # )
        self.window.addItem(self.points)
Esempio n. 12
0
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        add some grids in the 3 point... that are just filling up the background
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)  # we translate all of them
        gz.translate(0, 0, -10)  # so they're pushed out to the background
        self.window.addItem(gx)
        self.window.addItem(gy)  # add them all to the window
        self.window.addItem(gz)

        model = "mobilenet_thin_432x368"
        camera = 0
        w, h = model_wh(model)
        # we are gonna create e objects but instead we're gonna call it
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        # we're basically just going the same thing(run.py line:37) instead of args.model
        # we just created our own object for model
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        self.poseLifting = Prob3dPose(
            './src/lifting/models/prob_model_params.mat')
        # we'll have this object to do our 3d pose translater? yukardaki
        keypoints = self.mesh(image)

        self.points = gl.GLScatterPlotItem(  # plot dots
            pos=keypoints,
            color=pg.glColor((0, 255, 0)),
            size=15)
        self.window.addItem(self.points)
def cnvt(img, name) :
    os.chdir(read_path)
    image = cv2.imread(img)

    os.chdir(old_path)
    model = 'mobilenet_thin'
    resolution = '432x368'
    w, h = model_wh(resolution)

    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    humans = e.inference(image)
    blank_image = np.zeros((h,w,3), np.uint8)
    image = TfPoseEstimator.draw_humans(blank_image, humans, imgcopy=False)

    os.chdir(save_path)
    cv2.imwrite(name, image)
    print("Saved - %s As - %s" % (img, name))

    os.chdir(old_path)
Esempio n. 14
0
 def __init__(self, calibration_file):
     """
     This method init an OpenPose model
     """
     args = type('', (), {})
     # args.resolution = '1312x736'
     args.resolution = '432x368'
     args.model = 'mobilenet_thin'
     args.scales = '[None]'
     scales = ast.literal_eval(args.scales)
     w, h = model_wh(args.resolution)
     e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
     self.model = {'e': e, 'scales': scales}
     # start new matlab session
     self.matlab = matlab.engine.start_matlab()
     # or...
     # in matlab run: matlab.engine.shareEngine
     # and uncomment the following line
     # self.matlab = matlab.engine.connect_matlab()
     self.calibration = calibration_file
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """
        self.bitFalling = 0
        # Initialize plot.
        self.times = []
        self.recordVelocity = []
        self.recordNeck = []
        self.recordYTopRectangle = []
        self.recordHIP = []
        self.recordTimeList = []
        self.globalTime = 0
        self.highestNeck = 0
        # self.hightestNeckTime = 0
        self.highestHIP = 0
        self.saveTimesStartFalling = -1

        self.quoutaFalling = 0
        self.surpriseMovingTime = -1
        self.detectedHIP_Y = 0
        self.detectedNECK_Y = 0
        self.extraDistance = 0

        self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1,
                                                       varThreshold=500,
                                                       detectShadows=False)
        self.secondNeck = 0
        self.human_in_frame = False
        self.lastTimesFoundNeck = -1
        self.width = 300
        self.height = 200
        self.quotaVirtureNeck = 3
        self.used_quotaVirtureNeck = 0
        model = 'mobilenet_thin_432x368'
        w, h = model_wh(model)
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.recordAcceleration = []
Esempio n. 16
0
def generate_skeletonize_video():
    """
    The method takes images , save a skeleton per image and creates a video output
    :return:
    """
    input_folder = "./videos/demo/"
    output_folder = "./videos"
    results_folder = "./images/demo/"
    input_video = "./videos/demo.mp4"
    images = video_utils.load_images_from_folder(input_folder)
    w = 432
    h = 368
    estimator = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                                target_size=(w, h))
    count = 1
    for i in images:
        image_parts = estimator.inference(i, scales=None)
        image_skeleton = TfPoseEstimator.draw_humans(i,
                                                     image_parts,
                                                     imgcopy=True)
        cv2.imwrite(r".\images\demo\{}.png".format(count), image_skeleton)
        count = count + 1
    video_utils.create_video(input_video, results_folder, output_folder)
    def __init__(self):
        self.parser = argparse.ArgumentParser(
            description='tf-pose-estimation run')
        self.parser.add_argument(
            '--resolution',
            type=str,
            default='432x368',
            help='network input resolution. default=432x368')
        self.parser.add_argument('--model',
                                 type=str,
                                 default='mobilenet_thin',
                                 help='cmu / mobilenet_thin')
        self.parser.add_argument(
            '--scales',
            type=str,
            default='[None]',
            help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
        self.args = self.parser.parse_args()
        self.scales = ast.literal_eval(self.args.scales)

        self.w, self.h = model_wh(self.args.resolution)
        self.e = TfPoseEstimator(get_graph_path(self.args.model),
                                 target_size=(self.w, self.h))
Esempio n. 18
0
def init_hook(**params):
    PARAMS.update(params)
    PARAMS['resize_out_ratio'] = float(PARAMS['resize_out_ratio'])
    PARAMS['intersection_threshold'] = float(PARAMS['intersection_threshold'])
    PARAMS['target_size'] = _parse_resolution(PARAMS['target_size'])
    PARAMS['poses'] = helpers.boolean_string(PARAMS['poses'])
    PARAMS['crop_persons'] = helpers.boolean_string(PARAMS['crop_persons'])
    PARAMS['one_person'] = helpers.boolean_string(PARAMS['one_person'])
    PARAMS['draw_vectors'] = helpers.boolean_string(PARAMS['draw_vectors'])
    PARAMS['draw_boxes'] = helpers.boolean_string(PARAMS['draw_boxes'])
    PARAMS['skeleton_vectors'] = helpers.boolean_string(
        PARAMS['skeleton_vectors'])
    global e

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    if PARAMS['poses']:
        e = estimator.TfPoseEstimator(
            networks.get_graph_path(PARAMS['model'],
                                    models_dir=PARAMS['models_dir']),
            target_size=PARAMS['target_size'],
            tf_config=config,
        )
Esempio n. 19
0
def main():
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p3.jpg')
    parser.add_argument(
        '--resolution',
        type=str,
        default='432x368',
        help='network input resolution. default=432x368')
    parser.add_argument(
        '--model',
        type=str,
        default='mobilenet_thin',
        help='cmu / mobilenet_thin')
    parser.add_argument(
        '--scales',
        type=str,
        default='[None]',
        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    parser.add_argument(
        '--stick_only',
        action='store_true',
        help='save output without other analysis')
    parser.add_argument('--plot_3d', action='store_true', help='save 3d poses')

    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()

    import matplotlib.pyplot as plt

    fig = plt.figure()
    a = fig.add_subplot(2, 2, 1)
    a.set_title('Result')
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    if args.stick_only:
        cv2.imwrite('output/test.png', image)
        logger.info('image saved: {}'.format('output/test.png'))
        import sys
        sys.exit(0)
    plt.imshow(rgb_image)

    bgimg = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB)
    bgimg = cv2.resize(
        bgimg, (e.heatMat.shape[1], e.heatMat.shape[0]),
        interpolation=cv2.INTER_AREA)

    # show network output
    a = fig.add_subplot(2, 2, 2)
    plt.imshow(bgimg, alpha=0.5)
    tmp = np.amax(e.heatMat[:, :, :-1], axis=2)
    plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    tmp2 = e.pafMat.transpose((2, 0, 1))
    tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0)
    tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0)

    a = fig.add_subplot(2, 2, 3)
    a.set_title('Vectormap-x')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    a = fig.add_subplot(2, 2, 4)
    a.set_title('Vectormap-y')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()
    if not os.path.exists('output'):
        os.mkdir('output')
    plt.savefig('output/test.png')
    logger.info('image saved: {}'.format('output/test.png'))

    if not args.plot_3d:
        import sys
        sys.exit(0)
    #For plotting in 3d
    logger.info('3d lifting initialization.')
    poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat')

    image_h, image_w = image.shape[:2]
    standard_w = 640
    standard_h = 480

    pose_2d_mpiis = []
    visibilities = []
    for human in humans:
        pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
        pose_2d_mpiis.append([(int(x * standard_w + 0.5),
                               int(y * standard_h + 0.5))
                              for x, y in pose_2d_mpii])
        visibilities.append(visibility)

    pose_2d_mpiis = np.array(pose_2d_mpiis)
    visibilities = np.array(visibilities)
    transformed_pose2d, weights = poseLifting.transform_joints(
        pose_2d_mpiis, visibilities)
    pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)

    for i, single_3d in enumerate(pose_3d):
        plot_pose(single_3d)
    plt.draw()
    plt.savefig('output/pose_3d_test.png')
    logger.info('3d plot saved: {}'.format('output/pose_3d_test.png'))
    #alltime= time.time() - t0
    #logger.info('estimate all time:%.4f seconds.' % (alltime))
    return pose_3d, image


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--movie', type=str, default='../cai.mp4')
    parser.add_argument('--dataname', type=str, default='')
    parser.add_argument('--datas', type=str, default='data/')
    args = parser.parse_args()
    movie = cv2.VideoCapture(args.movie)

    #w, h = model_wh('432x368')
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(656, 368))
    ast_l = ast.literal_eval('[None]')
    frame_count = int(movie.get(7))

    for i in range(frame_count):
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect((TCP_IP, TCP_PORT))

        array = []
        _, frame = movie.read()
        data, image = Estimate_3Ddata(frame, e, ast_l)

        x = data[0][0]
        y = data[0][1]
        z = data[0][2]
Esempio n. 21
0
    parser.add_argument('--video', type=str, default='')
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument(
        '--model',
        type=str,
        default='cmu_640x480',
        help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument(
        '--show-process',
        type=bool,
        default=False,
        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()
    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        #logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image,
                                    None,
Esempio n. 22
0
ch.setFormatter(formatter)
logger.addHandler(ch)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    # parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg')
    parser.add_argument('--image', type=str, default='./images/apink2.jpg')
    # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240')
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(scales)

    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=[None])
    # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)])
    # humans = e.inference(image, scales=[(1.2, 0.05)])
    # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)])
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = cv2.imread(args.image, cv2.IMREAD_COLOR)
    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
Esempio n. 23
0
fps_time = 0


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')
    parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin')
    parser.add_argument('--show-process', type=bool, default=False,
                        help='for debug purpose, if enabled, speed for inference is dropped.')
    parser.add_argument('--output_json', type=str, default='3d-pose-baseline/src3d/json/', help='writing output json dir')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))
    frame=0
    while True:
        ret_val, image = cam.read()
        #image2 = cv2.threshold(image,0,255,cv2.THRESH_BINARY)
        image2 = cv2.imread('../images/123.jpg')
        logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            canvas2 = np.zeros_like(image2)
Esempio n. 24
0
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run') 						# Adding arguments to the programs
    parser.add_argument('--image', type=str, default='../images/p1.jpg')							# Adding images name else it will take the default image
    parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')	# Specify resolution 
    parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin')			# Specify Model
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')	# Scales - Reason Unknown 
    args = parser.parse_args()												# Argument contain all the parse
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution) #Return width and height into w, h respectively after checking if its a multiple of 16
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))		# Model + width and height

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()

    import matplotlib.pyplot as plt
Esempio n. 25
0
ch.setFormatter(formatter)
logger.addHandler(ch)

fps_time = 0


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--show-process', type=bool, default=False,
                        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
Esempio n. 26
0
    parser.add_argument(
        '--conf',
        type=str,
        default='configs/mgh.yaml',
        help=
        'Path to the YAML config file containing the parameters helpful for Openpose inference'
    )
    args = parser.parse_args()

    with open(args.conf, 'r') as f:
        conf_vals = yaml.load(f, Loader=yaml.FullLoader)
    conf_vals = conf_vals['estimate_pose']

    logger.debug(
        'initialization %s : %s' %
        (conf_vals['model_name'], get_graph_path(conf_vals['model_name'])))
    w, h = model_wh(conf_vals['resolution'])
    e = TfPoseEstimator(get_graph_path(conf_vals['model_name']),
                        target_size=(w, h))

    subdirs = os.listdir(conf_vals['data_dir'])

    def process_vids(subs):
        for i in trange(len(subs), desc='Subject ID', position=0):
            sub = subs[i]
            video_root = os.path.join(conf_vals['data_dir'], sub)
            l_vids = []
            for root, dirs, fnames in os.walk(video_root):
                for fname in fnmatch.filter(fnames,
                                            '*.' + conf_vals['data_ext']):
                    l_vids.append(os.path.join(root, fname))
Esempio n. 27
0
def pose_dd():
    fps_time = 0
    global humans
    global BREAK 
    global image 
    parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
    parser.add_argument('--camera', type=int, default=0)
    parser.add_argument('--zoom', type=float, default=1.0)
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--show-process', type=bool, default=False,
                        help='for debug purpose, if enabled, speed for inference is dropped.')
    args = parser.parse_args()

    ##logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    #logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    ret_val, image = cam.read()
    ##logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        #logger.debug('image preprocess+')
        if args.zoom < 1.0:
            canvas = np.zeros_like(image)
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (canvas.shape[1] - img_scaled.shape[1]) // 2
            dy = (canvas.shape[0] - img_scaled.shape[0]) // 2
            canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled
            image = canvas
        elif args.zoom > 1.0:
            img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR)
            dx = (img_scaled.shape[1] - image.shape[1]) // 2
            dy = (img_scaled.shape[0] - image.shape[0]) // 2
            image = img_scaled[dy:image.shape[0], dx:image.shape[1]]

        #logger.debug('image process+')
        humans = e.inference(image)

        #logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        #logger.debug('show+')
        cv2.putText(image,
                    "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        #out.write(image)
        if cv2.waitKey(1) == 27:
            #out.release()
            cv2.destroyAllWindows()
            BREAK = True
            clientsocket.send("off".encode('utf-8'))
            print("off sent")
            import sys
            sys.exit(0)
            break
Esempio n. 28
0
import cv2
import numpy as np

from estimator import TfPoseEstimator
from networks import get_graph_path, model_wh

fps_time = 0

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation Video')
    parser.add_argument('--video', type=str, default='')

    args = parser.parse_args()
    model = 'mobilenet_thin'
    w, h = model_wh('432x368')
    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
    cap = cv2.VideoCapture(args.video)

    if (cap.isOpened() == False):
        print("Error opening video stream or file")
    while (cap.isOpened()):
        ret_val, image = cap.read()

        humans = e.inference(image)
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        if cv2.waitKey(1) == 27:
Esempio n. 29
0
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')  # Specify Model
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]'
                        )  # Scales - Reason Unknown
    args = parser.parse_args()  # Argument contain all the parse
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(
        args.resolution
    )  #Return width and height into w, h respectively after checking if its a multiple of 16
    e = TfPoseEstimator(get_graph_path(args.model),
                        target_size=(w, h))  # Model + width and height

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))
    print(humans)
    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()
Esempio n. 30
0
    def post(self):
        global finished_post
        if ((self.request.headers['Content-Type'] == 'imagebin')
                and (finished_post == 1)):
            print(finished_post)
            finished_post = 0
            print('Received image')
            image = self.request.body
            fh = open('static/image1.jpg', 'wb')
            fh.write(image)
            fh.close()
            #fh = open('static/image1.jpg','ab')
            #fh.write(bytes([0xD9]))
            #fh.close()
            print('0')
            #image = cv2.imread('static/image1.jpg')
            print('1')
            print('2')

            parser = argparse.ArgumentParser(
                description='tf-pose-estimation run')
            parser.add_argument(
                '--resolution',
                type=str,
                default='432x368',
                help='network input resolution. default=432x368')
            parser.add_argument('--model',
                                type=str,
                                default='mobilenet_thin',
                                help='cmu / mobilenet_thin')
            parser.add_argument(
                '--scales',
                type=str,
                default='[None]',
                help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
            args = parser.parse_args()
            scales = ast.literal_eval(args.scales)

            w, h = model_wh(args.resolution)
            e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

            # estimate human poses from a single image !
            image = common.read_imgfile('static/image1.jpg', None, None)
            # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
            t = time.time()
            humans = e.inference(image, scales=scales)
            elapsed = time.time() - t

            logger.info('inference image: image3.jpg in %.4f seconds.' %
                        (elapsed))

            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            #cv2.putText(image,
            #               f"Fallen: False",
            #              (60, 60),  cv2.FONT_HERSHEY_SIMPLEX, 2,
            #             (0, 255, 0), 5)

            cv2.imwrite('static/image3.jpg', image)

            for client in clients:
                update_clients(client)

            print(finished_post)
            finished_post = 1
    # parameters
    image_topic = rospy.get_param('~camera', '')
    model = rospy.get_param('~model', 'cmu_640x480')
    scales = rospy.get_param('~scales', '[None]')
    scales = ast.literal_eval(scales)
    tf_lock = Lock()

    rospy.loginfo('[TfPoseEstimatorROS] scales(%d)=%s' % (len(scales), str(scales)))

    if not image_topic:
        rospy.logerr('Parameter \'camera\' is not provided.')
        sys.exit(-1)

    try:
        w, h = model_wh(model)
        graph_path = get_graph_path(model)

        rospack = rospkg.RosPack()
        graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path)
    except Exception as e:
        rospy.logerr('invalid model: %s, e=%s' % (model, e))
        sys.exit(-1)

    pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h))
    cv_bridge = CvBridge()

    rospy.Subscriber(image_topic, Image, callback_image, queue_size=1, buff_size=2**24)
    pub_pose = rospy.Publisher('~pose', Persons, queue_size=1)

    rospy.loginfo('start+')
    rospy.spin()
Esempio n. 32
0
if __name__ == '__main__':
    rospy.loginfo('initialization+')
    rospy.init_node('TfPoseEstimatorROS', anonymous=True)

    # parameters
    image_topic = rospy.get_param('~camera', '')
    model = rospy.get_param('~model', 'cmu_640x480')

    if not image_topic:
        rospy.logerr('Parameter \'camera\' is not provided.')
        sys.exit(-1)

    try:
        w, h = model_wh(model)
        graph_path = get_graph_path(model)

        rospack = rospkg.RosPack()
        graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path)
    except Exception as e:
        rospy.logerr('invalid model: %s, e=%s' % (model, e))
        sys.exit(-1)

    pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h))
    cv_bridge = CvBridge()

    rospy.Subscriber(image_topic, Image, callback_image, queue_size=1)
    pub_pose = rospy.Publisher('~pose', Persons, queue_size=1)

    rospy.loginfo('start+')
    rospy.spin()
def pose_comparison():
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p1.jpg')
    parser.add_argument('--resolution',
                        type=str,
                        default='432x368',
                        help='network input resolution. default=432x368')
    parser.add_argument('--model',
                        type=str,
                        default='mobilenet_thin',
                        help='cmu / mobilenet_thin')
    parser.add_argument('--scales',
                        type=str,
                        default='[None]',
                        help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    ref_image = common.read_imgfile(REF_POSE_PATH, None, None)
    ref_image = cv2.resize(ref_image, (640, 480))
    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    image = cv2.resize(image, (640, 480))
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    #t = time.time()

    ref_humans = e.inference(ref_image, scales=scales)
    humans = e.inference(image, scales=scales)

    #elapsed = time.time() - t

    #logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    _, ref_centers = TfPoseEstimator.draw_humans_mine(ref_image,
                                                      ref_humans,
                                                      imgcopy=False)
    _, centers = TfPoseEstimator.draw_humans_mine(image, humans, imgcopy=False)

    ref_centers = list(ref_centers.values())
    centers = list(centers.values())

    ref_centers = list(sum(ref_centers, ()))
    centers = list(sum(centers, ()))

    ref_centers = np.array(ref_centers, dtype=int)
    centers = np.array(centers, dtype=int)

    shapes = []
    shapes.append(ref_centers)
    shapes.append(centers)

    #create canvas on which the triangles will be visualized
    canvas = np.full([640, 480], 255).astype('uint8')

    #convert to 3 channel RGB for fun colors!
    canvas = cv2.cvtColor(canvas, cv2.COLOR_GRAY2RGB)
    #im = draw_shapes(canvas,shapes)

    x, y = get_translation(shapes[0])
    new_shapes = []
    new_shapes.append(shapes[0])

    for i in range(1, len(shapes)):
        new_shape = procrustes_analysis(shapes[0], shapes[i])
        new_shape[::2] = new_shape[::2] + x
        new_shape[1::2] = new_shape[1::2] + y
        new_shape = new_shape.astype(int)
        new_shapes.append(new_shape)

    pts_list = []

    for lst in new_shapes:
        temp = lst.reshape(-1, 1, 2)
        pts = list(map(tuple, temp))
        pts_list.append(pts)

    for i in range(18):
        cv2.circle(ref_image,
                   tuple(pts_list[0][i][0]),
                   3, (255, 0, 0),
                   thickness=3,
                   lineType=8,
                   shift=0)
        cv2.circle(ref_image,
                   tuple(pts_list[1][i][0]),
                   3, (255, 255, 0),
                   thickness=3,
                   lineType=8,
                   shift=0)

    cv2.imshow('tf-pose-estimation result', ref_image)
    cv2.waitKey(0)

    variations = []
    for i in range(len(new_shapes)):
        dist = procrustes_distance(shapes[0], new_shapes[i])
        variations.append(dist)

    print(variations)