def __init__(self, arguments): self.arguments = arguments # Frame window dim self.winWidth = 640 self.winHeight = 480 actionPredictor_params.__init__(self) self.fps_time = 0 #self.step = 15 self.mode = { 'Pose Estimation': 'estimation', 'Tracking': 'tracking', 'Action Recognition': 'recognition' } w, h = model_wh(self.arguments.resize) if w > 0 and h > 0: self.estimator = TfPoseEstimator(get_graph_path( self.arguments.model), target_size=(w, h)) else: self.estimator = TfPoseEstimator(get_graph_path( self.arguments.model), target_size=(432, 368)) self.cam = cv2.VideoCapture(self.arguments.camera) # Tracker based on Sort self.sort_max_age = 20 self.sort_min_hit = 3 self.tracker = Sort(self.sort_max_age, self.sort_min_hit)
def _perform_tracking_(self): self.humans = self.estimator.inference(self.image) self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( self.image, self.humans, imgcopy=False) height = self.image.shape[0] width = self.image.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle( self.image, (xmin, ymin), (xmax, ymax), (int(self.c[label % 32, 0]), int( self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4)
def load_model(): global poseEstimator global sk_cnn_actionPredicter poseEstimator = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) sk_cnn_actionPredicter = sk_cnn.SkelCNN() sk_cnn_actionPredicter.load_weights( './action_pre_sk_cnn/models/skel_cnn_model/sk-cnn.hdf5')
def load_model(): global poseEstimator poseEstimator = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368))
def show_camera(self): start = time.time() ret, frame = self.cap.read() # if not ret: # print('this should save the video') # self.timer_camera.stop() # self.out.release() if ret: show_s = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(show_s, cv2.COLOR_BGR2RGB) if self.__flag_mode == 1: self.infoBox.setText(u'当前为人体Attitude Estimation模式') humans = poseEstimator.inference(show) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) elif self.__flag_mode == 2: self.infoBox.setText(u'当前为Multiplayer tracking模式') humans = poseEstimator.inference(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) # This is the part to get the kt points elif self.__flag_mode == 3: self.infoBox.setText(u'当前为人体Behavior recognition模式') humans = poseEstimator.inference(show) ori = np.copy(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) try: j = np.argmin( np.array([ abs(i - (xmax + xmin) / 2.) for i in xcenter ])) except: j = 0 if joint_filter(joints[j]): joints[j] = joint_completion( joint_completion(joints[j])) if label not in self.data: self.data[label] = [joints[j]] self.memory[label] = 0 else: self.data[label].append(joints[j]) if len(self.data[label]) == settings.L: pred = actionPredictor().move_status( self.data[label]) if pred == 0: pred = self.memory[label] else: self.memory[label] = pred self.data[label].pop(0) location = self.data[label][-1][1] if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) cv2.putText( show, settings.move_status[pred], (location[0] - 30, location[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) self.out.write(show_s) # Write out frame to video end = time.time() self.fps = 1. / (end - start) cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) self.label_show_camera.setPixmap( QtGui.QPixmap.fromImage(showImage)) else: print('this should save the video') self.timer_camera.stop() self.out.release()
# -*- coding: utf-8 -*- """ Created on Thu Oct 3 20:41:35 2019 @author: ASUS """ import cv2 import numpy as np import settings from pose.estimator import TfPoseEstimator poseEstimator = None cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) humans = poseEstimator.inference(show) print(humans) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)
# -*- coding: utf-8 -*- """ Created on Thu Oct 3 20:41:35 2019 @author: ASUS """ import cv2 import numpy as np import settings from pose.estimator import TfPoseEstimator from pose.networks import get_graph_path from imutils.video import VideoStream poseEstimator = None poseEstimator = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) cap=cv2.VideoCapture(0) #cap = VideoStream(src='rtsp://*****:*****@192.168.51.162/PSIA/streaming/channels/102').start() while True: ret,frame=cap.read() ret=True if ret : show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) humans = poseEstimator.inference(show)
def show_camera(self): start = time.time() ret, frame = self.cap.read() show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) if ret: if self.__flag_mode == 1: self.infoBox.setText(u'当前为人体姿态估计模式') humans = poseEstimator.inference(show) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) elif self.__flag_mode == 2: self.infoBox.setText(u'当前为多人跟踪模式') humans = poseEstimator.inference(show) show, joints, bboxes, xcenter = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) elif self.__flag_mode == 3: self.infoBox.setText(u'当前为人体行为识别模式') humans = poseEstimator.inference(show) ori = np.copy(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) #有几个框就生成几个追踪器 self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) try: j = np.argmin( np.array([ abs(i - (xmax + xmin) / 2.) for i in xcenter ])) except: j = 0 if joint_filter(joints[j]): pred = 0 joints[j] = joint_completion( joint_completion(joints[j])) if len(joints[j]) == 18: if label not in self.data: self.data[label] = [ joints[j] ] #self.data[label]:[{0:(xj1,yj1),1:(xj2,yj2),...,n:(xjn,yjn)},{},{}] self.memory[label] = 0 else: self.data[label].append(joints[j]) #print('len(self.data[label]):{0}'.format(len(self.data[label]))) if len( self.data[label] ) == settings.L: #self.data[label]:[16,{18,key:(x,y)}] for i in range(settings.L): keys = self.data[label][i].keys() keys = sorted(keys) for key in keys: self.one_frame.append( np.array(self.data[label][i][key])) if label not in self.pred_data: self.pred_data[label] = [ np.array(self.one_frame) ] else: self.pred_data[label].append( np.array(self.one_frame) ) #[[[x0 y0],..[x17 y17]],..[]] self.one_frame = [] self.pred_data[label] = np.array( self.pred_data[label]) pred = sk_cnn_actionPredicter.predict( self.pred_data[label]) #print('预测动作下标:{0}'.format(pred)) self.pred_data[label] = [] self.data[label].pop(0) location = self.data[label][-1][1] if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) #print('预测动作类别:{0}'.format(settings.move_status[pred])) cv2.putText(show, settings.move_status[pred], (location[0] - 30, location[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) end = time.time() self.fps = 1. / (end - start) cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) self.label_show_camera.setPixmap( QtGui.QPixmap.fromImage(showImage))
def show_camera(self, frame): start = time.time() # ret, frame = self.cap.read() show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) if ret: if self.__flag_mode == 1: humans = poseEstimator.inference(show) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) elif self.__flag_mode == 2: humans = poseEstimator.inference(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) # This is the part to get the kt points elif self.__flag_mode == 3: humans = poseEstimator.inference(show) ori = np.copy(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) try: j = np.argmin( np.array([ abs(i - (xmax + xmin) / 2.) for i in xcenter ])) except: j = 0 if joint_filter(joints[j]): joints[j] = joint_completion( joint_completion(joints[j])) if label not in self.data: self.data[label] = [joints[j]] self.memory[label] = 0 else: self.data[label].append(joints[j]) if len(self.data[label]) == settings.L: pred = actionPredictor().move_status( self.data[label]) if pred == 0: pred = self.memory[label] else: self.memory[label] = pred self.data[label].pop(0) location = self.data[label][-1][1] if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) cv2.putText( show, settings.move_status[pred], (location[0] - 30, location[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) cv2.imshow('window', cv2.resize(show, (432, 368))) self.out.write(show) # Write out frame to video end = time.time()
def _perform_recognition_(self): self.predictor = actionPredictor() self.humans = self.estimator.inference(self.image) self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( self.image, self.humans, imgcopy=False) height = self.image.shape[0] width = self.image.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) #logger.debug('label is: %d' % (label)) # Locate the current person object in current frame # Iterated thru xcenter for finding minimum distance between object center coord try: j = np.argmin( np.array( [abs(i - (xmax + xmin) / 2.) for i in xcenter])) except: j = 0 # Check if major skeleton points are existing if self._joint_filter_(joints[j]): joints[j] = self._joint_complete_( self._joint_complete_(joints[j])) if label not in self.data: #logger.debug('label is: %d' % (label)) self.data[label] = [joints[j]] self.memory[label] = 0 else: self.data[label].append(joints[j]) if len(self.data[label]) == self.step: pred = self.predictor.move_status(self.data[label]) #logger.debug(len(self.data[label])) if pred == 0: pred = self.memory[label] else: self.memory[label] = pred self.data[label].pop(0) location = self.data[label][-1][1] #location = functools.reduce(lambda x, y: x + y, self.data[label][:][1]) / len(self.data[label][:][1]) #location = sum(self.data[label][:][1]) / float(len(self.data[label][:][1])) if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) cv2.putText(self.image, self.move_status[pred], (location[0] - 30, location[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) cv2.rectangle( self.image, (xmin, ymin), (xmax, ymax), (int(self.c[label % 32, 0]), int( self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4)
def _perform_estimation_(self): self.humans = self.estimator.inference(self.image) self.image = TfPoseEstimator.draw_humans(self.image, self.humans, imgcopy=False)
class actions(object): def __init__(self, arguments): self.arguments = arguments # Frame window dim self.winWidth = 640 self.winHeight = 480 actionPredictor_params.__init__(self) self.fps_time = 0 #self.step = 15 self.mode = { 'Pose Estimation': 'estimation', 'Tracking': 'tracking', 'Action Recognition': 'recognition' } w, h = model_wh(self.arguments.resize) if w > 0 and h > 0: self.estimator = TfPoseEstimator(get_graph_path( self.arguments.model), target_size=(w, h)) else: self.estimator = TfPoseEstimator(get_graph_path( self.arguments.model), target_size=(432, 368)) self.cam = cv2.VideoCapture(self.arguments.camera) # Tracker based on Sort self.sort_max_age = 20 self.sort_min_hit = 3 self.tracker = Sort(self.sort_max_age, self.sort_min_hit) def proceed(self): self._read_frame_() if self.ret_val and self.arguments.mode == self.mode['Pose Estimation']: self._perform_estimation_() elif self.ret_val and self.arguments.mode == self.mode['Tracking']: self._perform_tracking_() elif self.ret_val and self.arguments.mode == self.mode[ 'Action Recognition']: self._perform_recognition_() else: sys.exit( 'Abort...please choose correct action mode from "estimation" "tracking" "recognition"' ) self._output_() def _joint_filter_(self, joint): if 1 not in joint: return False # Check exist of hip if 8 not in joint and 11 not in joint: return False # Check exist of knee if 9 not in joint and 12 not in joint: return False return True def _joint_complete_(self, joint): if 8 in joint and 11 not in joint: joint[11] = joint[8] elif 8 not in joint and 11 in joint: joint[8] = joint[11] if 9 in joint and 12 not in joint: joint[12] = joint[9] elif 9 not in joint and 12 in joint: joint[9] = joint[12] return joint def _perform_estimation_(self): self.humans = self.estimator.inference(self.image) self.image = TfPoseEstimator.draw_humans(self.image, self.humans, imgcopy=False) def _perform_tracking_(self): self.humans = self.estimator.inference(self.image) self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( self.image, self.humans, imgcopy=False) height = self.image.shape[0] width = self.image.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle( self.image, (xmin, ymin), (xmax, ymax), (int(self.c[label % 32, 0]), int( self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4) def _perform_recognition_(self): self.predictor = actionPredictor() self.humans = self.estimator.inference(self.image) self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( self.image, self.humans, imgcopy=False) height = self.image.shape[0] width = self.image.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) #logger.debug('label is: %d' % (label)) # Locate the current person object in current frame # Iterated thru xcenter for finding minimum distance between object center coord try: j = np.argmin( np.array( [abs(i - (xmax + xmin) / 2.) for i in xcenter])) except: j = 0 # Check if major skeleton points are existing if self._joint_filter_(joints[j]): joints[j] = self._joint_complete_( self._joint_complete_(joints[j])) if label not in self.data: #logger.debug('label is: %d' % (label)) self.data[label] = [joints[j]] self.memory[label] = 0 else: self.data[label].append(joints[j]) if len(self.data[label]) == self.step: pred = self.predictor.move_status(self.data[label]) #logger.debug(len(self.data[label])) if pred == 0: pred = self.memory[label] else: self.memory[label] = pred self.data[label].pop(0) location = self.data[label][-1][1] #location = functools.reduce(lambda x, y: x + y, self.data[label][:][1]) / len(self.data[label][:][1]) #location = sum(self.data[label][:][1]) / float(len(self.data[label][:][1])) if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) cv2.putText(self.image, self.move_status[pred], (location[0] - 30, location[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) cv2.rectangle( self.image, (xmin, ymin), (xmax, ymax), (int(self.c[label % 32, 0]), int( self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4) def _read_frame_(self): self.ret_val, self.image = self.cam.read() self.image = cv2.resize(self.image, (self.winWidth, self.winHeight)) def _output_(self): # Calculate frame averaging step FPS = float(1.0 / (time.time() - self.fps_time)) #logger.debug('FPS: %f' % FPS) #self.step = int(0.7 * FPS) #logger.debug('step: %d' % self.step) cv2.putText(self.image, "FPS: %f" % (1.0 / (time.time() - self.fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) cv2.imshow('tf-pose-estimation result', self.image) self.fps_time = time.time()
def show_video(self): self.video_event = True start = time.time() cap = cv2.VideoCapture('22_360p.mp4') while (cap.isOpened()): ret, frame = cap.read() show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) if ret: if self.__flag_mode == 1: self.infoBox.setText(u'현재 인간 포즈 추정 모드') humans = poseEstimator.inference(show) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) elif self.__flag_mode == 2: self.infoBox.setText(u'현재 다중 인간 포즈 추정 모드') humans = poseEstimator.inference(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) elif self.__flag_mode == 3: self.infoBox.setText(u'현재 인간 행동 인식 모드') humans = poseEstimator.inference(show) ori = np.copy(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton( show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) try: j = np.argmin( np.array([ abs(i - (xmax + xmin) / 2.) for i in xcenter ])) except: j = 0 if joint_filter(joints[j]): joints[j] = joint_completion( joint_completion(joints[j])) if label not in self.data: self.data[label] = [joints[j]] self.memory[label] = 0 else: self.data[label].append(joints[j]) if len(self.data[label]) == settings.L: pred = actionPredictor().move_status( self.data[label]) if pred == 0: pred = self.memory[label] else: self.memory[label] = pred self.data[label].pop(0) location = self.data[label][-1][1] if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) cv2.putText( show, settings.move_status[pred], (location[0] - 30, location[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) end = time.time() self.fps = 1. / (end - start) cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) self.label_show_camera.setPixmap( QtGui.QPixmap.fromImage(showImage)) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release()
def show_camera(self): start = time.time() ret, frame = self.cap.read() show = cv2.resize(frame, (settings.winWidth, settings.winHeight)) show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB) if ret: if self.__flag_mode == 1: self.infoBox.setText(u'当前为人体姿态估计模式') humans = poseEstimator.inference(show) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) elif self.__flag_mode == 2: self.infoBox.setText(u'当前为多人跟踪模式') humans = poseEstimator.inference(show) show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])), 4) elif self.__flag_mode == 3: self.infoBox.setText(u'当前为人体行为识别模式') humans = poseEstimator.inference(show) show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False) ori = np.copy(show) show, joints, bboxes, xcenter, sk= TfPoseEstimator.get_skeleton(show, humans, imgcopy=False) height = show.shape[0] width = show.shape[1] if bboxes: result = np.array(bboxes) det = result[:, 0:5] det[:, 0] = det[:, 0] * width det[:, 1] = det[:, 1] * height det[:, 2] = det[:, 2] * width det[:, 3] = det[:, 3] * height trackers = self.tracker.update(det) self.current = [i[-1] for i in trackers] if len(self.previous) > 0: for item in self.previous: if item not in self.current and item in self.data: del self.data[item] if item not in self.current and item in self.memory: del self.memory[item] self.previous = self.current for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) try: j = np.argmin(np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter])) except: j = 0 alert = False if joint_filter(joints[j]): joints[j] = joint_completion(joint_completion(joints[j])) if label not in self.data: self.data[label] = [joints[j]] self.memory[label] = 0 else: self.data[label].append(joints[j]) if len(self.data[label]) == settings.L: pred = actionPredictor().move_status(self.data[label]) if pred == 0: pred = self.memory[label] else: self.memory[label] = pred self.data[label].pop(0) location = self.data[label][-1][1] if location[0] <= 30: location = (51, location[1]) if location[1] <= 10: location = (location[0], 31) ## Use simsum.ttc to write Chinese. b, g, r, a = 0, 255, 0, 0 img_pil = Image.fromarray(show) draw = ImageDraw.Draw(img_pil) # print('pred', pred) if pred == 8: draw.text((location[0] - 30, location[1] - 10), settings.move_status[pred], font=self.font, fill=(255, 0, 0, a)) alert = True else: draw.text((location[0] - 30, location[1] - 10), settings.move_status[pred], font=self.font, fill=(b, g, r, a)) show = np.array(img_pil) # cv2.putText(show, settings.move_status[pred], (location[0] - 30, location[1] - 10), # cv2.FONT_HERSHEY_SIMPLEX, 0.8, # (0, 255, 0), 2) if alert: colors = (255, 0, 0) else: colors = (int(settings.c[label % 32, 0]), int(settings.c[label % 32, 1]), int(settings.c[label % 32, 2])) cv2.rectangle(show, (xmin, ymin), (xmax, ymax), colors, 4) if alert: cv2.imwrite('files/alerts/fall_' + str(int(time.time())) + '.png', show) end = time.time() self.fps = 1. / (end - start) cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888) self.label_show_camera.setPixmap(QtGui.QPixmap.fromImage(showImage))
fps_time = 0 if __name__ == '__main__': # 类别以及要保存的视频段长度 action = 'satnd' clip_length = 90 root_path = '/home/dl1/datasets/actions/' if not os.path.exists(root_path + action): os.mkdir(root_path + action) if not os.path.exists(root_path + action + '/txt/'): os.mkdir(root_path + action + '/txt/') os.mkdir(root_path + action + '/imgs/') samples = len(os.listdir(root_path + action + '/txt/')) sample_count = 1000 if samples == 0 else 1000 + samples e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) cam = cv2.VideoCapture(0) ret_val, image = cam.read() joints = [] joints_imgs = [] while True: ret_val, image = cam.read() if ret_val: humans = e.inference(image) image, joint, *_, sk = TfPoseEstimator.get_humans(image, humans, imgcopy=False) if joint: if len(joints) < clip_length: joints.append(joint[0]) joints_imgs.append(sk)