def PoseEstimatorPredict(image_path,plot = False,resolution ='432x368', scales = '[None]',model = 'mobilenet_thin'): ''' input: image_path,图片路径,jpg plot = False,是否画图,如果True,两样内容,关键点信息+标点图片matrix resolution ='432x368', 规格 scales = '[None]', model = 'mobilenet_thin',模型选择 output: plot为false,返回一个内容:关键点信息 plot为true,返回两个内容:关键点信息+标点图片matrix ''' w, h = model_wh(resolution) e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) image = common.read_imgfile(image_path, None, None) t = time.time() humans = e.inference(image, scales=scales) # 主要的预测函数 elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (image_path, elapsed)) centers = get_keypoint(image,humans) # 拿上关键点信息 if plot: # 画图的情况下: image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # 画图函数 fig = plt.figure() a = fig.add_subplot(2, 2, 1) a.set_title('Result') plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) return centers,image else: # 不画图的情况下: return centers
def __init__(self): """ Initialize the graphics window and mesh surface """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) gz.translate(0, 0, -10) self.window.addItem(gx) self.window.addItem(gy) self.window.addItem(gz) model = 'mobilenet_thin' #model = '432x368' camera = 0 self.lines = {} self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] resolution = '432x368' w, h = model_wh(resolution) self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() #print('ret_val', ret_val) self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') keypoints = self.mesh(image) print('keypoints', keypoints) self.points = gl.GLScatterPlotItem(pos=keypoints, color=pg.glColor((0, 255, 0)), size=15) #print(keypoints) self.window.addItem(self.points) for n, pts in enumerate(self.connection): self.lines[n] = gl.GLLinePlotItem(pos=np.array( [keypoints[p] for p in pts]), color=pg.glColor((0, 0, 255)), width=3, antialias=True) self.window.addItem(self.lines[n])
def __init__(self, zoom=1.0, resolution='656x368', model='cmu', show_process=False): self.zoom = zoom self.resolution = resolution self.model = model self.show_process = show_process logger.debug('initialization %s : %s' % (model, get_graph_path(model))) self.w, self.h = model_wh(resolution) self.e = TfPoseEstimator(get_graph_path(model), target_size=(self.w, self.h))
def __init__(self): """ Initialize the graphics window and mesh surface """ self.bitFalling = 0 # Initialize plot. plt.ion() f2 = plt.figure(figsize=(6, 5)) self.windowNeck = f2.add_subplot(1, 1, 1) self.windowNeck.set_title('Speed') self.windowNeck.set_xlabel('Time') self.windowNeck.set_ylabel('Speed') # plt.show() self.times = [] self.recordVelocity = [] self.recordNeck = [] self.recordYTopRectangle = [] self.recordHIP = [] self.recordTimeList = [] self.globalTime = 0 self.highestNeck = 0 # self.highestNeckTime = 0 self.highestHIP = 0 self.saveTimesStartFalling = -1 self.surpriseMovingTime = -1 self.detectedHIP_Y = 0 self.detectedNECK_Y = 0 self.extraDistance = 0 self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1, varThreshold=500, detectShadows=False) self.secondNeck = 0 self.human_in_frame = False self.lastTimesFoundNeck = -1 self.width = 300 self.height = 200 self.quotaVirtureNeck = 3 self.used_quotaVirtureNeck = 0 self.quoutaFalling = 0 model = 'mobilenet_thin_432x368' w, h = model_wh(model) camera = 0 # 1 mean external camera , 0 mean internal camera self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) # self.cam = cv2.VideoCapture('C:/outpy2.avi') self.cam.set(cv2.CAP_PROP_AUTOFOCUS, 0) # turn the autofocus off self.recordAcceleration = []
def main(): parser = argparse.ArgumentParser( description='tf-pose-estimation run by folder') parser.add_argument('--folder', type=str, default='./images/') parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) types = ('*.png', '*.jpg') files_grabbed = [] for files in types: files_grabbed.extend(glob.glob(os.path.join(args.folder, files))) all_humans = dict() if not os.path.exists('output'): os.mkdir('output') for i, file in enumerate(files_grabbed): # estimate human poses from a single image ! image = common.read_imgfile(file, None, None) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image #%d: %s in %.4f seconds.' % (i, file, elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # cv2.imshow('tf-pose-estimation result', image) filename = 'pose_{}.png'.format(i) output_filepath = os.path.join('output', filename) cv2.imwrite(output_filepath, image) logger.info('image saved: {}'.format(output_filepath)) # cv2.waitKey(5000) all_humans[file.replace(args.folder, '')] = humans with open(os.path.join(args.folder, 'pose.dil'), 'wb') as f: dill.dump(all_humans, f, protocol=dill.HIGHEST_PROTOCOL)
def __init__(self): """ Initialize the graphics window and mesh surface """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) gz.translate(0, 0, -10) self.window.addItem(gx) self.window.addItem(gy) self.window.addItem(gz) model = 'mobilenet_thin' camera = 0 w, h = model_wh("432x368") self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(0) ret_val, image = self.cam.read() self.poseLifting = Prob3dPose('lifting/models/prob_model_params.mat') keypoints = self.mesh(image) self.points = gl.GLScatterPlotItem( pos=keypoints, color=pg.glColor((255, 255, 0)), size=15 ) # self.lines = gl.GLLinePlotItem( # pos=lines, # color=pg.glColor((255, 255, 255)), # width=2 # ) self.window.addItem(self.points)
def __init__(self): """ Initialize the graphics window and mesh surface """ # Initialize plot. plt.ion() f = plt.figure(figsize=(5, 5)) f2 = plt.figure(figsize=(6, 5)) self.window3DBody = f.gca(projection='3d') self.window3DBody.set_title('3D_Body') self.windowStable = f2.add_subplot(1, 1, 1) self.windowStable.set_title('Stable') self.windowStable.set_xlabel('Time') self.windowStable.set_ylabel('Distant') self.windowStable.set_ylim([0, 1500]) #plt.show() self.times = [0] self.stable = [0] self.recordHead = [] self.fps_time = 0 model = 'mobilenet_thin_432x368' w, h = model_wh(model) #model = 'cmu' #w,h = 656,368 camera = 1 #1 mean external camera , 0 mean internal camera self.lines = {} self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read(cv2.IMREAD_COLOR) self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') self.statusBodyWindow = 0 try: keypoints = self.mesh(image) except: pass
def __init__(self): """ Initialize the graphics window and mesh surface add some grids in the 3 point... that are just filling up the background """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) # we translate all of them gz.translate(0, 0, -10) # so they're pushed out to the background self.window.addItem(gx) self.window.addItem(gy) # add them all to the window self.window.addItem(gz) model = "mobilenet_thin_432x368" camera = 0 w, h = model_wh(model) # we are gonna create e objects but instead we're gonna call it self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # we're basically just going the same thing(run.py line:37) instead of args.model # we just created our own object for model self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') # we'll have this object to do our 3d pose translater? yukardaki keypoints = self.mesh(image) self.points = gl.GLScatterPlotItem( # plot dots pos=keypoints, color=pg.glColor((0, 255, 0)), size=15) self.window.addItem(self.points)
def cnvt(img, name) : os.chdir(read_path) image = cv2.imread(img) os.chdir(old_path) model = 'mobilenet_thin' resolution = '432x368' w, h = model_wh(resolution) e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) humans = e.inference(image) blank_image = np.zeros((h,w,3), np.uint8) image = TfPoseEstimator.draw_humans(blank_image, humans, imgcopy=False) os.chdir(save_path) cv2.imwrite(name, image) print("Saved - %s As - %s" % (img, name)) os.chdir(old_path)
def __init__(self, calibration_file): """ This method init an OpenPose model """ args = type('', (), {}) # args.resolution = '1312x736' args.resolution = '432x368' args.model = 'mobilenet_thin' args.scales = '[None]' scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) self.model = {'e': e, 'scales': scales} # start new matlab session self.matlab = matlab.engine.start_matlab() # or... # in matlab run: matlab.engine.shareEngine # and uncomment the following line # self.matlab = matlab.engine.connect_matlab() self.calibration = calibration_file
def __init__(self): """ Initialize the graphics window and mesh surface """ self.bitFalling = 0 # Initialize plot. self.times = [] self.recordVelocity = [] self.recordNeck = [] self.recordYTopRectangle = [] self.recordHIP = [] self.recordTimeList = [] self.globalTime = 0 self.highestNeck = 0 # self.hightestNeckTime = 0 self.highestHIP = 0 self.saveTimesStartFalling = -1 self.quoutaFalling = 0 self.surpriseMovingTime = -1 self.detectedHIP_Y = 0 self.detectedNECK_Y = 0 self.extraDistance = 0 self.fgbg = cv2.createBackgroundSubtractorMOG2(history=1, varThreshold=500, detectShadows=False) self.secondNeck = 0 self.human_in_frame = False self.lastTimesFoundNeck = -1 self.width = 300 self.height = 200 self.quotaVirtureNeck = 3 self.used_quotaVirtureNeck = 0 model = 'mobilenet_thin_432x368' w, h = model_wh(model) self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.recordAcceleration = []
def __init__(self): self.parser = argparse.ArgumentParser( description='tf-pose-estimation run') self.parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') self.parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') self.parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') self.args = self.parser.parse_args() self.scales = ast.literal_eval(self.args.scales) self.w, self.h = model_wh(self.args.resolution) self.e = TfPoseEstimator(get_graph_path(self.args.model), target_size=(self.w, self.h))
ch.setLevel(logging.DEBUG) formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) if __name__ == '__main__': parser = argparse.ArgumentParser(description='tf-pose-estimation run') # Adding arguments to the programs parser.add_argument('--image', type=str, default='../images/p1.jpg') # Adding images name else it will take the default image parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368') # Specify resolution parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') # Specify Model parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') # Scales - Reason Unknown args = parser.parse_args() # Argument contain all the parse scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) #Return width and height into w, h respectively after checking if its a multiple of 16 e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # Model + width and height # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # cv2.imshow('tf-pose-estimation result', image) # cv2.waitKey()
# parameters image_topic = rospy.get_param('~camera', '') model = rospy.get_param('~model', 'cmu_640x480') scales = rospy.get_param('~scales', '[None]') scales = ast.literal_eval(scales) tf_lock = Lock() rospy.loginfo('[TfPoseEstimatorROS] scales(%d)=%s' % (len(scales), str(scales))) if not image_topic: rospy.logerr('Parameter \'camera\' is not provided.') sys.exit(-1) try: w, h = model_wh(model) graph_path = get_graph_path(model) rospack = rospkg.RosPack() graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path) except Exception as e: rospy.logerr('invalid model: %s, e=%s' % (model, e)) sys.exit(-1) pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h)) cv_bridge = CvBridge() rospy.Subscriber(image_topic, Image, callback_image, queue_size=1, buff_size=2**24) pub_pose = rospy.Publisher('~pose', Persons, queue_size=1) rospy.loginfo('start+')
cocoGt = COCO(coco_json_file) catIds = cocoGt.getCatIds(catNms=['person']) keys = cocoGt.getImgIds(catIds=catIds) if args.data_idx < 0: if eval_size > 0: keys = keys[:eval_size] # only use the first #eval_size elements. pass else: keys = [keys[args.data_idx]] logger.info('validation %s set size=%d' % (coco_json_file, len(keys))) write_json = 'etcs/%s_%s_%f.json' % (args.model, args.resize, args.resize_out_ratio) logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resize) if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) result = [] for i, k in enumerate(tqdm(keys)): img_meta = cocoGt.loadImgs(k)[0] img_idx = img_meta['id'] img_name = os.path.join(image_dir, img_meta['file_name']) image = read_imgfile(img_name, None, None) if image is None: logger.error('image not found, path=%s' % img_name) sys.exit(-1)
rospy.init_node('TfPoseEstimatorROS', anonymous=True, log_level=rospy.INFO) # parameters image_topic = rospy.get_param('~camera', '') model = rospy.get_param('~model', 'cmu') resolution = rospy.get_param('~resolution', '432x368') resize_out_ratio = float(rospy.get_param('~resize_out_ratio', '4.0')) tf_lock = Lock() if not image_topic: rospy.logerr('Parameter \'camera\' is not provided.') sys.exit(-1) try: w, h = model_wh(resolution) graph_path = get_graph_path(model) rospack = rospkg.RosPack() graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path) except Exception as e: rospy.logerr('invalid model: %s, e=%s' % (model, e)) sys.exit(-1) pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h)) cv_bridge = CvBridge() rospy.Subscriber(image_topic, Image, callback_image, queue_size=1, buff_size=2**24) pub_pose = rospy.Publisher('~pose', Persons, queue_size=1) rospy.loginfo('start+')
'--conf', type=str, default='configs/mgh.yaml', help= 'Path to the YAML config file containing the parameters helpful for Openpose inference' ) args = parser.parse_args() with open(args.conf, 'r') as f: conf_vals = yaml.load(f, Loader=yaml.FullLoader) conf_vals = conf_vals['estimate_pose'] logger.debug( 'initialization %s : %s' % (conf_vals['model_name'], get_graph_path(conf_vals['model_name']))) w, h = model_wh(conf_vals['resolution']) e = TfPoseEstimator(get_graph_path(conf_vals['model_name']), target_size=(w, h)) subdirs = os.listdir(conf_vals['data_dir']) def process_vids(subs): for i in trange(len(subs), desc='Subject ID', position=0): sub = subs[i] video_root = os.path.join(conf_vals['data_dir'], sub) l_vids = [] for root, dirs, fnames in os.walk(video_root): for fname in fnmatch.filter(fnames, '*.' + conf_vals['data_ext']): l_vids.append(os.path.join(root, fname)) l_vids = sorted(l_vids)
def pose_dd(): fps_time = 0 global humans global BREAK global image parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam') parser.add_argument('--camera', type=int, default=0) parser.add_argument('--zoom', type=float, default=1.0) parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368') parser.add_argument('--show-process', type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') args = parser.parse_args() ##logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.model) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) #logger.debug('cam read+') cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() ##logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True: ret_val, image = cam.read() #logger.debug('image preprocess+') if args.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (canvas.shape[1] - img_scaled.shape[1]) // 2 dy = (canvas.shape[0] - img_scaled.shape[0]) // 2 canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled image = canvas elif args.zoom > 1.0: img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (img_scaled.shape[1] - image.shape[1]) // 2 dy = (img_scaled.shape[0] - image.shape[0]) // 2 image = img_scaled[dy:image.shape[0], dx:image.shape[1]] #logger.debug('image process+') humans = e.inference(image) #logger.debug('postprocess+') image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) #logger.debug('show+') cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time() #out.write(image) if cv2.waitKey(1) == 27: #out.release() cv2.destroyAllWindows() BREAK = True clientsocket.send("off".encode('utf-8')) print("off sent") import sys sys.exit(0) break
def doCNNTracking(args): global is_tracking,logger fps_time = 0 logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) logger.debug('cam read+') pipeline = rs.pipeline() pipeline.start() frames = pipeline.wait_for_frames() #get depth影像 depth = frames.get_depth_frame() depth_image_data = depth.as_frame().get_data() depth_image = np.asanyarray(depth_image_data) logger.info('cam depth image=%dx%d' % (depth_image.shape[1], depth_image.shape[0])) logger.info('camera ready') #計算depth影像對應至rgb影像的clip clip_box = [100,-100,290,-300] human_list = [] while (True): if(is_tracking): fps_time = time.time() frames = pipeline.wait_for_frames() #get rgb影像 image_frame = frames.get_color_frame() image_data = image_frame.as_frame().get_data() image = np.asanyarray(image_data) #change bgr 2 rgb image = np.array(image[...,::-1]) origen_image = image #get depth影像 depth = frames.get_depth_frame() depth_image_data = depth.as_frame().get_data() depth_image = np.asanyarray(depth_image_data) depth_image = depth_image[(int)(clip_box[0]):(int)(clip_box[1]),(int)(clip_box[2]):(int)(clip_box[3])] depth_image = cv2.resize(depth_image, (640, 480), interpolation=cv2.INTER_CUBIC) depth_image=depth_image/30 depth_image.astype(np.uint8) #深度去背的遮罩 thresh=cv2.inRange(depth_image,20,160) #去背的遮罩做影像處理 kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1, 1)) eroded = cv2.erode(thresh,kernel) kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT,(5, 5)) dilated = cv2.dilate(eroded,kernel2) #亮度遮罩 bright_mask = np.zeros(image.shape); bright_mask.fill(200) bright_mask = bright_mask.astype(np.uint8); bright_mask = cv2.bitwise_and(bright_mask, bright_mask, mask=dilated) #rgb影像亮度處理 # image = cv2.bitwise_and(image, image, mask=dilated) image = image.astype(int)+200-bright_mask.astype(int); image = np.clip(image, 0, 255) image = image.astype(np.uint8); image=origen_image #影像邊緣檢測 image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(image_gray, (3, 3), 0) canny = cv2.Canny(blurred, 50, 50) canny_blurred = cv2.GaussianBlur(canny, (13, 13), 0) cv2.imshow('test', canny_blurred) #tfpose image 縮放 if args.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (canvas.shape[1] - img_scaled.shape[1]) // 2 dy = (canvas.shape[0] - img_scaled.shape[0]) // 2 canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled image = canvas elif args.zoom > 1.0: img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (img_scaled.shape[1] - image.shape[1]) // 2 dy = (img_scaled.shape[0] - image.shape[0]) // 2 image = img_scaled[dy:image.shape[0], dx:image.shape[1]] #tfpose 計算 humans = e.inference(image) # #得到joint # jdata = TfPoseEstimator.get_json_data(image.shape[0],image.shape[1],humans) # if(len(jdata)>2): # try: # #傳送Position資料至SERVER # chating_room.sendTrackingData(jdata,'track') # except: # print("Cannot send data to server.") #去背後深度影像 depth_masked = cv2.bitwise_and(depth_image, depth_image, mask=dilated) new_human_list = [] for human in humans: #計算深度資料 depthDatas=[None] * 20 image_h, image_w = image.shape[:2] # get joint data with depth for i in range(common.CocoPart.Background.value): if i not in human.body_parts.keys(): continue body_part = human.body_parts[i] y= int(body_part.y * image_h+ 0.5) x = int(body_part.x * image_w + 0.5) s=5; mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639] count=0; sum_depth=0; for j in range (mat.shape[0]): for k in range (mat.shape[1]): if mat[j,k]!=0: sum_depth+=mat[j,k] count+=1 if(count>0): depth=sum_depth/count else: depth=0 mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639] try: depthDatas[i] = (JointDepthData(i,x,y,depth)) except: print("err:"+str(x)+" "+str(y)+" "+str(body_part.x )+" "+str(body_part.y )) # 後處理 jn0=np.zeros((20,3)) for j in depthDatas: if(j!=None): jn0[j.jn]=np.array([1,j.x,j.y]) jn0 = jn0.astype(int) jn00 = jn0.copy() old_images=None if(len(human_list)>0): # 找與之前最相似的Human Data most_simular_value = 9999999 most_simular_human = '' jn1=np.zeros((20,3)) for human_data in human_list: for j in human_data.joint_list: if(j != None): jn1[j.jn]=np.array([1,j.x,j.y]) jn1 = jn1.astype(int) different_value=0 match_count=0 for i in range (18) : if(jn0[i,0]*jn1[i,0]!=0): different_value+= np.linalg.norm(jn0[i]-jn1[i]) match_count+=1 if(different_value/match_count<most_simular_value): most_simular_human = human_data old_images = most_simular_human.image_list if(old_images==None): old_images = [np.zeos((80,80))]*18 w = 160 # for joint_i in range (18): # smallest_diff = 9999999 # if(jn0[joint_i,0]==int(1) and jn1[joint_i,0]==int(1) and old_images[joint_i].shape[0]>0 and old_images[joint_i].shape[1]>0): # new_center=np.zeros(2) # for i in range (-20,20,10): # for j in range (-20,20,10): # center = np.array([jn0[joint_i,1]+i,jn0[joint_i,2]+j]) # mat0 = canny_blurred[center[0]-w:center[0]+w,center[1]-w:center[1]+w] # if(mat0.shape[0]!=2*w or mat0.shape[1]!=2*w or old_images[joint_i].shape[0]!=2*w or old_images[joint_i].shape[1]!=2*w): # continue # try: # mat1 = mat0-old_images[joint_i] # except: # print(mat0.shape) # mat2 = np.exp2(mat1) # diff = np.sum(mat2) # if(diff<smallest_diff): # smallest_diff=diff # new_images[joint_i] = canny_blurred[center[0]-w:center[0]+w,center[1]-w:center[1]+w] # new_center=center # if(smallest_diff<9999999): # jn0[joint_i,1] = int(new_center[0]+jn0[joint_i,1])/2 # jn0[joint_i,2] = int(new_center[1]+jn1[joint_i,1])/2 temp = np.copy(image) for joint_i in range(18): center = np.array([jn0[joint_i,1],jn0[joint_i,2]]) mat0 = image_gray[center[1]-w if(center[1]-w>=0) else 0:center[1]+w if(center[1]+w<480) else 479,center[0]-w if(center[0]-w>=0) else 0:center[0]+w if(center[0]+w<640) else 639] mat1 = old_images[joint_i] if(mat0.shape[0]>mat1.shape[0]and mat0.shape[1]>mat1.shape[1] and mat0.shape[0]>0 and mat0.shape[1]>0 and mat1.shape[0]>0 and mat1.shape[1]>0): loc = match_img(mat0,mat1) lt = (center[0]-w if(center[0]-w>=0) else 0,center[1]-w if(center[1]-w>=0) else 0) if(len(loc[1])>0): c = (int(sum(loc[1])/len(loc[1])),int(sum(loc[0])/len(loc[0]))) cv2.rectangle(temp,(c[0]+lt[0],c[1]+lt[1]),(c[0]+lt[0]+40,c[1]+lt[1]+40), common.CocoColors[joint_i], 2) # for pt in : # c = pt-np.array((center[1]-w if(center[1]-w>=0) else 0,center[0]-w if(center[0]-w>=0) else 0))+np.array((center[1],center[0])) # c2 = (c[0],c[1]) # cv2.rectangle(temp,c2,(c[0]+40,c[1]+40),(255,255,255), 2) cv2.imshow('test3', temp ) w = 40 new_depthDatas=[None]*20 new_images = [np.zeros((80,80))]*20 for joint_i in range (18): if(jn0[joint_i,0]==1): new_depthDatas[joint_i]=(JointDepthData(joint_i,jn0[joint_i,1],jn0[joint_i,2],depthDatas[joint_i].dp)) m=np.copy(image_gray[jn0[joint_i,2]-w:jn0[joint_i,2]+w,jn0[joint_i,1]-w:jn0[joint_i,1]+w]) if(old_images==None and m.shape[0]==2*w and m.shape[1]==2*w): cv2.circle(image,(jn0[joint_i,1],jn0[joint_i,2]), w, (0,0,255), -1) new_images[joint_i]=m elif(old_images==None and (m.shape[0]<2*w or m.shape[1]<2*w)): pass elif(m.shape[0]==2*w and m.shape[1]==2*w): cv2.circle(image,(jn0[joint_i,1],jn0[joint_i,2]),w, (0,255,0), -1) new_images[joint_i]=old_images[joint_i]*0.9+m*0.1 else: cv2.circle(image,(jn0[joint_i,1],jn0[joint_i,2]), w, (255,0,0), -1) new_images[joint_i]=old_images[joint_i] if(joint_i==1 and m.shape[0]==2*w and m.shape[1]==2*w): cv2.imshow('test2', m) cv2.circle(image,(jn00[joint_i,1],jn00[joint_i,2]), w, (255,255,0), -1) # new_images.append(depth_masked[jn0[joint_i,1]-5:jn0[joint_i,1]+5,jn0[joint_i,2]-5:jn0[joint_i,2]+5]) new_human = HumanData(new_depthDatas,new_images) new_human_list.append(new_human) # depth_jdata=json.dumps(new_depthDatas) # if(len(depth_jdata)>2): # try: # #傳送Depth資料至SERVER # print(depth_jdata) # chating_room.sendTrackingData(depth_jdata,'track_depth') # except: # print("Cannot send depth data to server.") human_list = new_human_list depth_image = cv2.applyColorMap(cv2.convertScaleAbs(depth_image/25), cv2.COLORMAP_JET) # cv2.circle(image,(320,240), 5, (255,0,0), -1) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.putText(image,"FPS: %f" % (1.0 / (time.time() - fps_time)),(10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break cv2.destroyAllWindows()
parser.add_argument('--camera', type=int, default=0) parser.add_argument('--zoom', type=float, default=1.0) parser.add_argument( '--model', type=str, default='cmu_640x480', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368') parser.add_argument( '--show-process', type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.model) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) logger.debug('cam read+') cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True: ret_val, image = cam.read() #logger.debug('image preprocess+') if args.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=args.zoom,
def startt(): fps_time = 0 logger.debug('initialization %s : %s' % ('mobilenet_thin', get_graph_path('mobilenet_thin'))) w, h = model_wh('432x368') e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h)) logger.debug('cam read+') cam = cv2.VideoCapture(0,cv2.CAP_DSHOW) #"http://127.0.0.1:8000/" ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) frame=0 while True: ret_val, image = cam.read() #image2 = cv2.threshold(image,0,255,cv2.THRESH_BINARY) image2 = cv2.imread('../images/123.jpg') logger.debug('image preprocess+') """if args.zoom < 1.0: canvas = np.zeros_like(image) canvas2 = np.zeros_like(image2) img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) img_scaled2 = cv2.resize(image2, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (canvas.shape[1] - img_scaled.shape[1]) // 2 dy = (canvas.shape[0] - img_scaled.shape[0]) // 2 canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled canvas2[dy:dy + img_scaled2.shape[0], dx:dx + img_scaled2.shape[1]] = img_scaled2 image = canvas image2 = canvas2 elif args.zoom > 1.0: img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (img_scaled.shape[1] - image.shape[1]) // 2 dy = (img_scaled.shape[0] - image.shape[0]) // 2 image = img_scaled[dy:image.shape[0], dx:image.shape[1]]""" #image2=np.array(image2) logger.debug('image process+') humans = e.inference(image) n = len(humans) print(humans) """body = {} body["key_points"] = [] for human in humans: for parts in human.body_parts.items(): print("id" ,parts[0]) print("x",parts[1].x) print("y",parts[1].y) print("score = ",parts[1].score) body["key_points"].append({"ID":parts[0],"X":parts[1].x,"Y":parts[1].y}) with open("json/{0}.json".format(str(i)),'w') as file: json.dump(body,file)""" output_json='3d-pose-baseline/src3d/json/' logger.debug('postprocess+') image2 = TfPoseEstimator.draw_humans(image2, humans, imgcopy=False,frame=frame,dir=output_json) # image = logger.debug('show+') cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(image, "persons = %f" % n,(10,30),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 3) cv2.imshow('orignal',image) #cv2.imshow('tf-pose-estimation result', image2) if n==1: frame=frame+1 fps_time = time.time() logger.debug('finished+') keyboard = cv2.waitKey(30) if keyboard == 'q' or keyboard == 27: break cam.release() cv2.destroyAllWindows() return image2
def post(self): global finished_post if ((self.request.headers['Content-Type'] == 'imagebin') and (finished_post == 1)): print(finished_post) finished_post = 0 print('Received image') image = self.request.body fh = open('static/image1.jpg', 'wb') fh.write(image) fh.close() #fh = open('static/image1.jpg','ab') #fh.write(bytes([0xD9])) #fh.close() print('0') #image = cv2.imread('static/image1.jpg') print('1') print('2') parser = argparse.ArgumentParser( description='tf-pose-estimation run') parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile('static/image1.jpg', None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image: image3.jpg in %.4f seconds.' % (elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) #cv2.putText(image, # f"Fallen: False", # (60, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, # (0, 255, 0), 5) cv2.imwrite('static/image3.jpg', image) for client in clients: update_clients(client) print(finished_post) finished_post = 1
def doCNNTracking(args): global is_tracking,logger fps_time = 0 logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) logger.debug('cam read+') pipeline = rs.pipeline() pipeline.start() frames = pipeline.wait_for_frames() #get depth影像 depth = frames.get_depth_frame() depth_image_data = depth.as_frame().get_data() depth_image = np.asanyarray(depth_image_data) logger.info('cam depth image=%dx%d' % (depth_image.shape[1], depth_image.shape[0])) logger.info('camera ready') #計算depth影像對應至rgb影像的clip clip_box = [100,-100,290,-300] while (True): if(is_tracking): fps_time = time.time() frames = pipeline.wait_for_frames() #get rgb影像 image_frame = frames.get_color_frame() image_data = image_frame.as_frame().get_data() image = np.asanyarray(image_data) #change bgr 2 rgb image = np.array(image[...,::-1]) origen_image = image #get depth影像 depth = frames.get_depth_frame() depth_image_data = depth.as_frame().get_data() depth_image = np.asanyarray(depth_image_data) depth_image = depth_image[(int)(clip_box[0]):(int)(clip_box[1]),(int)(clip_box[2]):(int)(clip_box[3])] depth_image = cv2.resize(depth_image, (640, 480), interpolation=cv2.INTER_CUBIC) depth_image=depth_image/30 depth_image.astype(np.uint8) #深度去背的遮罩 thresh=cv2.inRange(depth_image,20,200) #去背的遮罩做影像處理 kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1, 1)) eroded = cv2.erode(thresh,kernel) kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT,(5, 5)) dilated = cv2.dilate(eroded,kernel2) #亮度遮罩 bright_mask = np.zeros(image.shape); bright_mask.fill(200) bright_mask = bright_mask.astype(np.uint8); bright_mask = cv2.bitwise_and(bright_mask, bright_mask, mask=dilated) #rgb影像亮度處理 # image = cv2.bitwise_and(image, image, mask=dilated) image = image.astype(int)+200-bright_mask.astype(int); image = np.clip(image, 0, 255) image = image.astype(np.uint8); #tfpose image 縮放 if args.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (canvas.shape[1] - img_scaled.shape[1]) // 2 dy = (canvas.shape[0] - img_scaled.shape[0]) // 2 canvas[dy:dy + img_scaled.shape[0], dx:dx + img_scaled.shape[1]] = img_scaled image = canvas elif args.zoom > 1.0: img_scaled = cv2.resize(image, None, fx=args.zoom, fy=args.zoom, interpolation=cv2.INTER_LINEAR) dx = (img_scaled.shape[1] - image.shape[1]) // 2 dy = (img_scaled.shape[0] - image.shape[0]) // 2 image = img_scaled[dy:image.shape[0], dx:image.shape[1]] #tfpose 計算 humans = e.inference(image) # #得到joint # jdata = TfPoseEstimator.get_json_data(image.shape[0],image.shape[1],humans) # if(len(jdata)>2): # try: # #傳送Position資料至SERVER # chating_room.sendTrackingData(jdata,'track') # except: # print("Cannot send data to server.") #去背後深度影像 depth_masked = cv2.bitwise_and(depth_image, depth_image, mask=dilated) human_json_datas = [] for human in humans: #計算深度資料 depthDatas=[] image_h, image_w = image.shape[:2] # get point for i in range(common.CocoPart.Background.value): if i not in human.body_parts.keys(): continue body_part = human.body_parts[i] y= int(body_part.y * image_h+ 0.5) x = int(body_part.x * image_w + 0.5) s=5; mat = depth_masked[y-s if(y-s>=0) else 0:y+s if(y+s<=479) else 479,x-s if(x-s>=0) else 0:x+s if (x+s<=639) else 639] count=0; sum_depth=0; for j in range (mat.shape[0]): for k in range (mat.shape[1]): if mat[j,k]!=0: sum_depth+=mat[j,k] count+=1 if(count>0): depth=sum_depth/count else: depth=0 try: depthDatas.append(JointDepthData(i,x,y,depth).__dict__) except: print("err:"+str(x)+" "+str(y)+" "+str(body_part.x )+" "+str(body_part.y )) human_json_datas.append(json.dumps(depthDatas)) json_data = json.dumps(human_json_datas) if(len(json_data)>2): try: #傳送Depth資料至SERVER chating_room.sendTrackingData(json_data,'track_depth') except: print("Cannot send depth data to server.") depth_image = cv2.applyColorMap(cv2.convertScaleAbs(depth_image/25), cv2.COLORMAP_JET) cv2.circle(image,(320,240), 5, (255,255,255), -1) cv2.circle(image,(304,480-98), 5, (0,0,255), -1) cv2.circle(image,(377,480-197), 5, (0,160,255), -1) cv2.circle(image,(106,480-49), 5, (0,255,255), -1) cv2.circle(image,(460,480-136), 5, (0,255,0), -1) cv2.circle(image,(481,480-134), 5, (255,0,0), -1) cv2.circle(image,(85,480-143), 5, (255,160,0), -1) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.putText(image,"FPS: %f" % (1.0 / (time.time() - fps_time)),(10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def __init__(self): """ Initialize the graphics window and mesh surface add some grids in the 3 point... that are just filling up the background """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setFixedSize(1920, 1080) self.window.showMaximized() # self.window.setMaximumSize(1920, 1080) # self.win = pg.GraphicsWindow() # self.vb = self.win.addViewBox(col=0, row=0) # self.t = pg.TextItem("zeynep", (255, 255, 255), anchor=(0,0)) # self.vb.addItem(self.t) self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 0, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() # self.root = Tk() self.angle = "" # self.root = Tk() # self.root.geometry("100x50+%d+%d" % (0, 0)) # top left # self.root.overrideredirect(True) # frameless tkinter window # self.root.resizable(False, False) # self.root.columnconfigure(0, weight=1) # self.root.rowconfigure(0, weight=1) # TEXT = "" # self.lbl = tk.Label(root, text=TEXT, bg="#e61c1c", font=("bold", 15), border=0, width=35) # self.lbl.grid(column=0, row=0, sticky="nsew") gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) # we translate all of them gz.translate(0, 0, -10) # so they're pushed out to the background self.window.addItem(gx) self.window.addItem(gy) # add them all to the window self.window.addItem(gz) model = "mobilenet_thin_432x368" camera = 0 self.lines = {} self.connections = [ # lines that we want to connect all those key points [0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16] ] # 5.35 w, h = model_wh(model) # we are gonna create e objects but instead we're gonna call it self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # we're basically just going the same thing(run.py line:37) instead of args.model # we just created our own object for model self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') # we'll have this object to do our 3d pose translater? yukardaki keypoints = self.mesh(image) # rightPointList = keypoints[2:5] self.rightPointList = keypoints[11:14] self.leftPointList = keypoints[14:] self.a = ( gl.GLScatterPlotItem( # plot dot pos=keypoints[:11], color=pg.glColor((0, 255, 0)), size=15)) self.window.addItem(self.a) self.right = ( gl.GLScatterPlotItem( # plot dot pos=self.rightPointList, color=pg.glColor((255, 0, 0)), size=15)) self.window.addItem(self.right) self.left = ( gl.GLScatterPlotItem( # plot dot pos=self.leftPointList, color=pg.glColor((0, 0, 255)), size=15)) self.window.addItem(self.left) for n, pts in enumerate(self.connections): self.lines[n] = gl.GLLinePlotItem( # lines dict with all of them pos=np.array([keypoints[p] for p in pts]), color=pg.glColor((0, 0, 255)), width=3, antialias=True) # add them to our window self.window.addItem(self.lines[n])
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) if __name__ == '__main__': parser = argparse.ArgumentParser(description='tf-pose-estimation run') # parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg') parser.add_argument('--image', type=str, default='./images/apink2.jpg') # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240') parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(scales) w, h = model_wh(args.model) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=[None]) # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)]) # humans = e.inference(image, scales=[(1.2, 0.05)]) # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)]) elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) image = cv2.imread(args.image, cv2.IMREAD_COLOR)
import cv2 import numpy as np from estimator import TfPoseEstimator from networks import get_graph_path, model_wh fps_time = 0 if __name__ == '__main__': parser = argparse.ArgumentParser(description='tf-pose-estimation Video') parser.add_argument('--video', type=str, default='') args = parser.parse_args() model = 'mobilenet_thin' w, h = model_wh('432x368') e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) cap = cv2.VideoCapture(args.video) if (cap.isOpened() == False): print("Error opening video stream or file") while (cap.isOpened()): ret_val, image = cap.read() humans = e.inference(image) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time()
default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument( '--show-process', type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) logger.debug('cam read+') cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True: ret_val, image = cam.read() logger.debug('image preprocess+') if args.zoom < 1.0: canvas = np.zeros_like(image) img_scaled = cv2.resize(image, None, fx=args.zoom,
def post(self): global fallen if ((self.request.headers['Content-Type'] == 'imagebin')): print('Received image') image = self.request.body fh = open('static/image1.jpg', 'wb') fh.write(image) fh.close() #fh = open('static/image1.jpg','ab') #fh.write(bytes([0xD9])) #fh.close() print('0') #image = cv2.imread('static/image1.jpg') print('1') print('2') parser = argparse.ArgumentParser( description='tf-pose-estimation run') parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile('static/image1.jpg', None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image: image3.jpg in %.4f seconds.' % (elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) fallen = 'OKAY' for i, h in enumerate(humans): TORSO_INDEX = 1 LEFT_HIP_INDEX = 8 RIGHT_HIP_INDEX = 11 RIGHT_HAND_INDEX = 4 RIGHT_FOOT_INDEX = 12 LEFT_FOOT_INDEX = 9 # and RIGHT_HAND_INDEX in parts and RIGHT_FOOT_INDEX in parts and LEFT_FOOT_INDEX in parts: parts = h.body_parts if TORSO_INDEX in parts and LEFT_HIP_INDEX in parts and RIGHT_HIP_INDEX in parts: torso = parts[TORSO_INDEX] left_hip = parts[LEFT_HIP_INDEX] right_hip = parts[RIGHT_HIP_INDEX] tx, ty = torso.x, torso.y lhx, lhy = left_hip.x, left_hip.y rhx, rhy = right_hip.x, right_hip.y if tx < lhx or tx > rhx: fallen = 'FALL' if abs(lhy - ty) < 0.1 or abs(rhy - ty) < 0.1: fallen = 'FALL' if RIGHT_HAND_INDEX in parts and RIGHT_FOOT_INDEX in parts and LEFT_FOOT_INDEX in parts: right_foot = parts[RIGHT_FOOT_INDEX] left_foot = parts[LEFT_FOOT_INDEX] right_hand = parts[RIGHT_HAND_INDEX] righax, righay = right_hand.x, right_hand.y rfx, rfy = right_foot.x, right_foot.y lfx, lfy = left_foot.x, left_foot.y if abs(lfy - lhy) < 0.1 or abs(rhy - ty) < 0.1: fallen = 'FALL' if (lfy - lhy) > (lhy - ty): fallen = 'FALL' print(lfy - lhy, lhy - ty) lowermag = math.sqrt((lfy - lhy) * (lfy - lhy) + (lhx - lfx) * (lhx - lfx)) uppermag = math.sqrt((lhy - ty) * (lhy - ty) + (tx - lhx) * (tx - lhx)) if lowermag > uppermag: fallen = 'FALL' #cv2.putText(image, # f"Fallen: False", # (60, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, # (0, 255, 0), 5) cv2.putText(image, f"Fallen: {fallen}", (60, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 5) cv2.imwrite('static/image3.jpg', image) for client in clients: update_clients(client)
def main(): parser = argparse.ArgumentParser(description='tf-pose-estimation run') parser.add_argument('--image', type=str, default='./images/p3.jpg') parser.add_argument( '--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument( '--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument( '--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') parser.add_argument( '--stick_only', action='store_true', help='save output without other analysis') parser.add_argument('--plot_3d', action='store_true', help='save 3d poses') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=scales) elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # cv2.imshow('tf-pose-estimation result', image) # cv2.waitKey() import matplotlib.pyplot as plt fig = plt.figure() a = fig.add_subplot(2, 2, 1) a.set_title('Result') rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if args.stick_only: cv2.imwrite('output/test.png', image) logger.info('image saved: {}'.format('output/test.png')) import sys sys.exit(0) plt.imshow(rgb_image) bgimg = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB) bgimg = cv2.resize( bgimg, (e.heatMat.shape[1], e.heatMat.shape[0]), interpolation=cv2.INTER_AREA) # show network output a = fig.add_subplot(2, 2, 2) plt.imshow(bgimg, alpha=0.5) tmp = np.amax(e.heatMat[:, :, :-1], axis=2) plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5) plt.colorbar() tmp2 = e.pafMat.transpose((2, 0, 1)) tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0) tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0) a = fig.add_subplot(2, 2, 3) a.set_title('Vectormap-x') # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5) plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5) plt.colorbar() a = fig.add_subplot(2, 2, 4) a.set_title('Vectormap-y') # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5) plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5) plt.colorbar() if not os.path.exists('output'): os.mkdir('output') plt.savefig('output/test.png') logger.info('image saved: {}'.format('output/test.png')) if not args.plot_3d: import sys sys.exit(0) #For plotting in 3d logger.info('3d lifting initialization.') poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat') image_h, image_w = image.shape[:2] standard_w = 640 standard_h = 480 pose_2d_mpiis = [] visibilities = [] for human in humans: pose_2d_mpii, visibility = common.MPIIPart.from_coco(human) pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii]) visibilities.append(visibility) pose_2d_mpiis = np.array(pose_2d_mpiis) visibilities = np.array(visibilities) transformed_pose2d, weights = poseLifting.transform_joints( pose_2d_mpiis, visibilities) pose_3d = poseLifting.compute_3d(transformed_pose2d, weights) for i, single_3d in enumerate(pose_3d): plot_pose(single_3d) plt.draw() plt.savefig('output/pose_3d_test.png') logger.info('3d plot saved: {}'.format('output/pose_3d_test.png'))
def __init__(self): # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setFixedSize(1920, 1080) # self.window.showMaximized() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 0, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() self.angle = "" gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) # we translate all of them gz.translate(0, 0, -10) # so they're pushed out to the background self.window.addItem(gx) self.window.addItem(gy) # add them all to the window self.window.addItem(gz) model = "mobilenet_thin_432x368" camera = 0 self.lines = {} self.connections = [ # lines that we want to connect all those key points [0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16] ] # 5.35 w, h = model_wh(model) # we are gonna create e objects but instead we're gonna call it print("modellllll : ", get_graph_path(model)) self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) print("self.e : ", self.e) # we're basically just going the same thing(run.py line:37) instead of args.model # we just created our own object for model self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() self.poseLifting = Prob3dPose( './src/lifting/models/prob_model_params.mat') # we'll have this object to do our 3d pose translater? yukardaki keypoints = self.mesh(image) self.rightPointList = keypoints[11:14] self.leftPointList = keypoints[14:] self.a = ( gl.GLScatterPlotItem( # plot dot pos=keypoints[:11], color=pg.glColor((0, 255, 0)), size=15)) self.window.addItem(self.a) self.right = ( gl.GLScatterPlotItem( # plot dot pos=self.rightPointList, color=pg.glColor((255, 0, 0)), size=15)) self.window.addItem(self.right) self.left = ( gl.GLScatterPlotItem( # plot dot pos=self.leftPointList, color=pg.glColor((0, 0, 255)), size=15)) self.window.addItem(self.left) for n, pts in enumerate(self.connections): self.lines[n] = gl.GLLinePlotItem( # lines dict with all of them pos=np.array([keypoints[p] for p in pts]), color=pg.glColor((0, 0, 255)), width=3, antialias=True) # add them to our window self.window.addItem(self.lines[n])
if __name__ == '__main__': rospy.loginfo('initialization+') rospy.init_node('TfPoseEstimatorROS', anonymous=True) # parameters image_topic = rospy.get_param('~camera', '') model = rospy.get_param('~model', 'cmu_640x480') if not image_topic: rospy.logerr('Parameter \'camera\' is not provided.') sys.exit(-1) try: w, h = model_wh(model) graph_path = get_graph_path(model) rospack = rospkg.RosPack() graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path) except Exception as e: rospy.logerr('invalid model: %s, e=%s' % (model, e)) sys.exit(-1) pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h)) cv_bridge = CvBridge() rospy.Subscriber(image_topic, Image, callback_image, queue_size=1) pub_pose = rospy.Publisher('~pose', Persons, queue_size=1) rospy.loginfo('start+')
def pose_comparison(): parser = argparse.ArgumentParser(description='tf-pose-estimation run') parser.add_argument('--image', type=str, default='./images/p1.jpg') parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) ref_image = common.read_imgfile(REF_POSE_PATH, None, None) ref_image = cv2.resize(ref_image, (640, 480)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) image = cv2.resize(image, (640, 480)) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) #t = time.time() ref_humans = e.inference(ref_image, scales=scales) humans = e.inference(image, scales=scales) #elapsed = time.time() - t #logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) _, ref_centers = TfPoseEstimator.draw_humans_mine(ref_image, ref_humans, imgcopy=False) _, centers = TfPoseEstimator.draw_humans_mine(image, humans, imgcopy=False) ref_centers = list(ref_centers.values()) centers = list(centers.values()) ref_centers = list(sum(ref_centers, ())) centers = list(sum(centers, ())) ref_centers = np.array(ref_centers, dtype=int) centers = np.array(centers, dtype=int) shapes = [] shapes.append(ref_centers) shapes.append(centers) #create canvas on which the triangles will be visualized canvas = np.full([640, 480], 255).astype('uint8') #convert to 3 channel RGB for fun colors! canvas = cv2.cvtColor(canvas, cv2.COLOR_GRAY2RGB) #im = draw_shapes(canvas,shapes) x, y = get_translation(shapes[0]) new_shapes = [] new_shapes.append(shapes[0]) for i in range(1, len(shapes)): new_shape = procrustes_analysis(shapes[0], shapes[i]) new_shape[::2] = new_shape[::2] + x new_shape[1::2] = new_shape[1::2] + y new_shape = new_shape.astype(int) new_shapes.append(new_shape) pts_list = [] for lst in new_shapes: temp = lst.reshape(-1, 1, 2) pts = list(map(tuple, temp)) pts_list.append(pts) for i in range(18): cv2.circle(ref_image, tuple(pts_list[0][i][0]), 3, (255, 0, 0), thickness=3, lineType=8, shift=0) cv2.circle(ref_image, tuple(pts_list[1][i][0]), 3, (255, 255, 0), thickness=3, lineType=8, shift=0) cv2.imshow('tf-pose-estimation result', ref_image) cv2.waitKey(0) variations = [] for i in range(len(new_shapes)): dist = procrustes_distance(shapes[0], new_shapes[i]) variations.append(dist) print(variations)