help='False to show skeleton only.') parser.add_argument('--savefile', type=str, required=True, help='저장할 파일 이름') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) cap = cv2.VideoCapture("/home/educon/data2/video/" + args.video) if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): tmp = [[] for _ in range(18 * 3)] ret_val, image = cap.read() if ret_val: humans = e.inference(image, upsample_size=4.0) if humans: #save data generated by tfpose for i in range(0, 18): if i not in humans[0].body_parts.keys(): tmp[3 * i] = 0 tmp[3 * i + 1] = 0 tmp[3 * i + 2] = 0 else: #detect human tmp[3 * i] = humans[0].body_parts[i].x tmp[3 * i + 1] = humans[0].body_parts[i].y tmp[3 * i + 2] = humans[0].body_parts[i].score data.append(tmp) if not args.showBG:
boundary_frames = 0 try: i = 0 cap = cv2.VideoCapture(0) current_clip = np.zeros((frame_per_clip, required_landmarks_count, 2)) while True: ret, frame = cap.read() if not ret: cap.release() break #frame = convert_landscape_potrait(frame) start_time = time.time() #frame = cv2.resize(frame, (640, 480)) frame_counter += 1 # check if boundary frames has been skipped and it is the desired frame like 3rd or 5th etc humans = e.inference(frame, upsample_size=4.0) max_pro_human = 0 if len(humans) == 0: print("No Human Detected") else: if len(humans) > 1: # only choose the one with higher score max_avg_score = 0 for h in range(len(humans)): human = humans[h] avg = 0 for hb in human.body_parts: avg += human.body_parts[hb].score avg = avg / len(human.body_parts) print("Score%d\t%f" % (h, avg)) if avg > max_avg_score:
# parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg') parser.add_argument('--image', type=str, default='./images/apink2.jpg') # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240') parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') args = parser.parse_args() #scales = ast.literal_eval(scales) w, h = model_wh(args.model) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21) t = time.time() humans = e.inference(image, scales=[None]) # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)]) # humans = e.inference(image, scales=[(1.2, 0.05)]) # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)]) elapsed = time.time() - t logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed)) image = cv2.imread(args.image, cv2.IMREAD_COLOR) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.imshow('tf-pose-estimation result', image) cv2.waitKey() #import sys #sys.exit(0)
parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small') parser.add_argument('--show-process', type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') parser.add_argument('--showBG', type=bool, default=True, help='False to show skeleton only.') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) cap = cv2.VideoCapture(args.video) if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): ret_val, image = cap.read() humans = e.inference(image) if not args.showBG: image = np.zeros(image.shape) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) fps_time = time.time() if cv2.waitKey(1) == 27: break cv2.destroyAllWindows() logger.debug('finished+')
count = count+1 else: pose_reinit = True else: pose_reinit = True if frames_skiped >= 5: pose_reinit = True prevgray = gray_opt ch = 0xFF & cv.waitKey(1) if ch == 27: break if ch == ord('1') or pose_reinit: humans = e.inference(gray_pose, True, 2) #run_in_separate_process(e.inference,[e,gray_pose, False, 2]) tr_points_temp = tr_points tr_points = get_pose_points(humans) if tr_points: for tr_idx in tr_points: tr_points[tr_idx] = np.array(tr_points[tr_idx])*tuple(reversed(gray_opt.shape[:2])) #p0 = tr_points else: tr_points = tr_points_temp frames_skiped = 0 pose_reinit = False
def single(video_path, model='mobilenet_thin', verbose=10, model_instance=None): if model not in [ 'cmu', 'mobilenet_thin', 'mobilenet_v2_large', 'mobilenet_v2_small', None ]: raise Exception( 'Incompatible model chosen! Available models: cmu, mobilenet_thin, mobilenet_v2_large, mobilenet_v2_small' ) cap = cv2.VideoCapture(video_path) width = cap.get(3) height = cap.get(4) fps = cap.get(cv2.CAP_PROP_FPS) # future support for batch processing of videos using same instance of TfPoseEstimator if model_instance == None: model_instance = TfPoseEstimator(get_graph_path(model), target_size=(result_width, result_height)) points_per_frame = np.array([]) frameN = 1 if cap.isOpened() is False: raise Exception('Error opening file!') while cap.isOpened(): ret, image = cap.read() if image is None: break image = resize_frame(image) humans = model_instance.inference(image, upsample_size=4.0) points = TfPoseEstimator.get_points(image, humans) _zeros = np.full((points.shape[0], 1), frameN) _points = np.c_[_zeros, points] if _points.size != 0: if points_per_frame.size == 0: points_per_frame = _points else: points_per_frame = np.vstack((points_per_frame, _points)) # else: # _zeros = np.array([[0, 0, 0, 0, 'NA', 0]]) # _zeros[0, 0] = int(_zeros[0, 0]) + frameN # print(_zeros) # points_per_frame = np.vstack((points_per_frame, _zeros)) # change level of verbosity if verbose == 0: print(f"Frame number {frameN} processed") else: if frameN % verbose == 0: print(f"Frame number {frameN} processed") frameN += 1 return points_per_frame
class BodyDetect(): def __init__(self, video_path=None, model='mobilenet_thin', resolution='432x368', showBG=''): # model : 'cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small' # resoultion : 'network input resolution. default=432x368' # showBG : 'Use it with any non-empty string to show skeleton only.' self.camera = VideoCamera(video_path) # Initialize some variables self.w, self.h = model_wh(resolution) self.estimator = TfPoseEstimator(get_graph_path(model), target_size=(self.w, self.h)) self.showBG = showBG self.time_checker = 0.0 self.best_score = float("inf") # lower is better, always positive self.reference = None # human object. self.last_frame = None def __del__(self): del self.camera def process_frame(self, frame): frame = frame.copy() # humans = e.inference(frrame, resize_to_default=) humans = self.estimator.inference(frame, resize_to_default=True, upsample_size=4.0) if self.showBG: frame = np.zeros(frame.shape) # print("before",frame.shape[1], "x", frame.shape[0]) frame = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) # print("after", frame.shape[1], "x", frame.shape[0]) return frame def eval_frame_with_reference(self, target_frame, reference_frame, time): humans = self.estimator.inference(target_frame, resize_to_default=True, upsample_size=4.0) current_score = TfPoseEstimator.get_pose_fit_score( humans, reference_frame) # print("current_score", current_score, "best_score", self.best_score, "check time", time) if current_score < self.best_score: self.best_score = current_score self.time_checker = time return time def get_jpg_bytes(self): # Grab a single frame of video frame = self.camera.get_frame() if frame is None: return frame = self.process_frame(frame) # We are using Motion JPEG, but OpenCV defaults to capture raw images, # so we must encode it into JPEG in order to correctly display the # video stream. _, jpg = cv2.imencode('.jpg', frame) return jpg.tobytes() # Web cam version (just pose estimate) def bodyDetectVideo(self, clip): def fl(get_frame, t): frame = get_frame(t) return self.process_frame(frame) return clip.fl(fl) # Server processing version (for cut and merge) # clip : second video, reference : reference image for cut # check first 10 seconds of second video def bodyDetectVideo_for_merge(self, clip): def fl(get_frame, t): frame = get_frame(t) if t < 10: self.eval_frame_with_reference(frame, self.reference, t) return frame return clip.fl(fl) # One frame to human. Only one human because we modify estimate_paf of estimator.py def frame_to_human(self, frame): humans = self.estimator.inference(frame, resize_to_default=True, upsample_size=4.0) if len(humans) != 1: # we can do it just one person case raise NotImplementedError return humans[0] # get last frame for clip helper funcion def get_last_frame(self, clip): def fl(get_frame, t): frame = get_frame(t) self.last_frame = frame return frame return clip.fl(fl)
def run_processing(camera=0, resize='0x0', resize_out_ratio=4.0, model="mobilenet_thin", show_process=False, remote_server=''): ################################# # DECLARE VARIABLES ############# ################################# frames_sent_per_sec = time.time() fps_time = 0 pose = "None" logger.debug('initialization %s : %s' % (model, get_graph_path(model))) w, h = model_wh(resize) t = threading.currentThread() global tello try: tello = Tello("192.168.10.3", 8888, imperial=False, command_timeout=0.3) except Exception as e: print("Exception Occurred: {}".format(traceback.format_exc())) if isinstance(tello, Tello): del tello if remote_server != '': # 2 threads in total. 1 for listening 1 for receiving clientsocket = None while clientsocket is None: try: serverip = remote_server.split(":")[0] port = remote_server.split(":")[1] clientsocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) clientsocket.connect((serverip, int(port))) print("Connected to {}:{}".format(serverip, port)) time.sleep(2) except socket.error: raise RuntimeError( "Connection error with server, trying again...") try: # start receiving thread th = threading.Thread(target=recv_thread, args=(clientsocket, )) th.do_run = True th.daemon = True # main thread and still exit even if this thread is running. th.start() except Exception: raise RuntimeError("Error starting thread, trying again...") else: if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) else: e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368)) ########################### # START CAMERA ############ ########################### cam = cv2.VideoCapture(camera) # cam.set(cv2.CAP_PROP_FPS, 3) ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True and getattr(t, "do_run", True): #################################################### # START CAMERA STREAM AND DRAW THE SKELETONS ####### #################################################### ret_val, image = cam.read() image = cv2.flip(image, 1) # raw_image = copy.deepcopy(image) if remote_server != '': # start sending frame try: data = pickle.dumps(image) clientsocket.sendall(struct.pack("<L", len(data)) + data) processed_fifo.put(image, block=True) print("frames sent per second %s" % (1.0 / (time.time() - frames_sent_per_sec))) except ZeroDivisionError: continue except Exception as e: print("Exception occurred: {}".format(traceback.format_exc())) clientsocket.shutdown(2) clientsocket.close() with processed_fifo.mutex: processed_fifo.queue.clear() break # delay_time = 1000 / (1.0 / (time.time() - frames_sent_per_sec)) # 1000/delaytime = fps frames_sent_per_sec = time.time() # cv2.waitKey(int(math.ceil(delay_time))) cv2.waitKey(delay_time) continue # todo: send image to cpu instance on aws logger.debug('image process+') humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=resize_out_ratio) logger.debug('postprocess+') image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # image = cv2.resize(image , (2*w,2*h), # interpolation = cv2.INTER_LINEAR) if len(humans) > 0: recognise_poses(image, humans) cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) fps_time = time.time() cv2.imshow('tf-pose-estimation result', image) if cv2.waitKey(1) == 115: # start the recognition pass if cv2.waitKey(1) == 27: break logger.debug('finished+') # exit receiving thread if (th.do_run): th.do_run = False th.join() with processed_fifo.mutex: processed_fifo.queue.clear() clientsocket.shutdown(2) clientsocket.close() cv2.destroyAllWindows()
class Detector(): def __init__(self, target_ip): self.CWD_PATH = os.getcwd() self.CWD_PATH = os.path.abspath(os.path.join(self.CWD_PATH, os.pardir)) self.CWD_PATH = os.path.join(self.CWD_PATH, '3_BRobot') # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' PATH_TO_CKPT = os.path.join(self.CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(self.CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.right_clicks = [] # self.right_clicks = [[375, 41], [1000, 709]] # mouse callback function def mouse_callback(event, x, y, flags, params): #right-click event value is 2 if event == 2: if len(self.right_clicks) < 2: self.right_clicks.append([x, y]) else: self.right_clicks = [[x, y]] print(self.right_clicks) CAM_ID = 1 self.cam = cv2.VideoCapture(int(CAM_ID)) self.window_name = 'Cam' + str(CAM_ID) cv2.namedWindow(self.window_name) cv2.setMouseCallback(self.window_name, mouse_callback) self.prevTime = 0 self.window_size = (1312, 736) if self.cam.isOpened() == False: print('Can\'t open the CAM(%d)' % (CAM_ID)) exit() self.face_queue = Queue() self.gender_queue = Queue() self.age_queue = Queue() self.process_gender = Process(target=gender_estimate, args=(self.face_queue, self.gender_queue)) self.process_gender.start() self.process_age = Process(target=age_estimate, args=(self.face_queue, self.age_queue)) self.process_age.start() self.w = self.window_size[0] self.h = self.window_size[1] self.e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(self.w, self.h)) def detect_objects(self, image_np, sess, detection_graph, mot_tracker, img_to_color, face_detect, face_queue, gender_queue, age_queue): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) trackers = mot_tracker.update(boxes[0]) person_ids = [i for i, e in enumerate(classes[0]) if e == 1] person_attr = {'age': 'NA', 'gender': 'NA', 'color': 'NA'} if len(person_ids) > 0: selected_person_id = person_ids[0] person_box = boxes[0][selected_person_id] person_score = scores[0][selected_person_id] person_tracker = trackers[selected_person_id] if person_score > 0.6: def get_color(q, img): try: start_time = time.monotonic() c = img_to_color.get(img) q.put({"flag": "color", "value": c}) elapsed_time = time.monotonic() - start_time print("Color", elapsed_time) except: q.put({"flag": "color", "value": False}) def detect_face(q, img, face_detect, face_queue, gender_queue, age_queue): start_time = time.monotonic() # your code files = [] faces, face_files, rectangles, tgtdir = face_detect.run( img) face_queue.put([face_files, img, tgtdir]) face_queue.put([face_files, img, tgtdir]) person_gender = gender_queue.get() person_age = age_queue.get() print("gender rcvd", person_gender) print("Age rcvd", person_age) q.put({"flag": "gender", "value": person_gender}) q.put({"flag": "age", "value": person_age}) elapsed_time = time.monotonic() - start_time print("Age/Gender", elapsed_time) person_img = crop_img(image_np, person_box) q = Queue() procs = [] process_color = Process(target=get_color, args=( q, person_img, )) procs.append(process_color) process_face = Process(target=detect_face, args=(q, person_img, face_detect, face_queue, gender_queue, age_queue)) procs.append(process_face) for proc in procs: proc.start() results = [] for proc in procs: results.append(q.get()) results.append(q.get()) for proc in procs: proc.join() for result in results: person_attr[result['flag']] = result['value'] # print(person_attr) # override boxes boxes = np.expand_dims(person_box, axis=0) classes = [1] scores = np.expand_dims(person_score, axis=0) trackers = np.expand_dims(person_tracker, axis=0) person_attr = [person_attr] # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, boxes, classes, scores, trackers, person_attr, self.category_index, use_normalized_coordinates=True, line_thickness=3) return image_np, person_attr def detect_start(self): with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph) as sess: # Load modules mot_tracker = Sort() npz = np.load('./bin/color_extractor/color_names.npz') img_to_color = ImageToColor(npz['samples'], npz['labels']) face_detect = face_detection_model( 'dlib', './bin/age_gender/Model/shape_predictor_68_face_landmarks.dat' ) person_attr = False while (True): ret, frame = self.cam.read() # Detection if len(self.right_clicks) == 2: print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) try: image_process, person_attr = self.detect_objects( cropped_img, sess, self.detection_graph, mot_tracker, img_to_color, face_detect, self.face_queue, self.gender_queue, self.age_queue) print("####", person_attr) if isinstance(person_attr, list): if person_attr[0][ 'gender'] != 'NA' and person_attr[0][ 'gender'] != False: break else: if person_attr[ 'gender'] != 'NA' and person_attr[ 'gender'] != False: break except Exception as e: print(e) pass curTime = time.time() sec = curTime - self.prevTime self.prevTime = curTime fps = 1 / (sec) str1 = "FPS : %0.1f" % fps str2 = "Testing . . ." cv2.putText(frame, str1, (5, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0)) cv2.putText(frame, str2, (100, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0)) cv2.imshow(self.window_name, frame) if cv2.waitKey(1) & 0xFF == ord('q'): self.detect_stop() break # plt.figure(figsize=IMAGE_SIZE) # plt.imshow(image_process) # plt.show() if person_attr: return person_attr else: return False def detect_stop(self): self.cam.release() # cv2.destroyWindow(self.window_name) cv2.destroyAllWindows() # self.process_gender.join() # self.process_age.join() print("Detect Stop") return True def pose_start(self): print("Pose Start") result = False while (True): ret, frame = self.cam.read() cropped_img = None if len(self.right_clicks) == 2: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) humans = self.e.inference(frame, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=4.0) if len(humans) > 0: if 7 in humans[0].body_parts or 4 in humans[0].body_parts: print("Hands Detected") result = 1 break image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) cv2.imshow('tf-pose-estimation result', image) cv2.imshow(self.window_name, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) if cv2.waitKey(1) & 0xFF == ord('q'): break return result def hands_detect_start(self): print("Hands detection start") result = False im_width = 320 im_height = 180 self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, im_width) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, im_height) im_width, im_height = (self.cam.get(3), self.cam.get(4)) score_thresh = 0.2 # max number of hands we want to detect/track num_hands_detect = 2 while True: ret, frame = self.cam.read() cropped_img = None if len(self.right_clicks) == 2: # resized_frame = cv2.resize(frame, (im_width, im_height)) # print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) # actual detection boxes, scores = detector_utils.detect_objects( frame, self.hands_detection_graph, self.hands_detection_sess) # Hands 위치 포지션 체크 # 핸들 영역에 들어오면 리턴해서 게임 플레이 # break # draw bounding boxes detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, frame) cv2.imshow('Hands Detection', frame) cv2.imshow(self.window_name, frame) if cv2.waitKey(25) & 0xFF == ord('q'): break return result
def get_keypoints(image, model='mobilenet_thin', display=False): cocopart_dict = { 'Nose': 0, 'Neck': 1, 'RShoulder': 2, 'RElbow': 3, 'RWrist': 4, 'LShoulder': 5, 'LElbow': 6, 'LWrist': 7, 'RHip': 8, 'RKnee': 9, 'RAnkle': 10, 'LHip': 11, 'LKnee': 12, 'LAnkle': 13, 'REye': 14, 'LEye': 15, 'REar': 16, 'LEar': 17 } key_points = [] # for logger ''' logger = logging.getLogger('TfPoseEstimatorRun') logger.handlers.clear() logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) logger.propagate = False ''' # need image, model, resize, resize-out-ratio w, h = 432, 368 # image size fixed 432x368 upsample_size = 4.0 # default e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # if you use video capture, do not need line # estimate human poses from a single image ! image = common.read_imgfile(image, None, None) if image is None: #logger.error('Image can not be read, path=%s' % args.image) print(f'Image {image} can not be read') sys.exit(-1) t = time.time() # upsample_size default setting 4.0 humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=upsample_size) elapsed = time.time() - t #for key, value in cocopart_dict.items() : # key_points[key] = (humans[0].body_parts[value].x, humans[0].body_parts[value].y) for key, value in cocopart_dict.items(): # JS # 만약 추적된 Keypoints들이 없으면 key 값이 없다(제외시켜주기 위해 try 문 사용) try: key_points.append([ humans[0].body_parts[value].x, humans[0].body_parts[value].y, humans[0].body_parts[value].score ]) except: key_points.append([-1, -1, 0]) #pprint.pprint(key_points) # display pose_estimation result if display == True: display_image(image, e) return key_points
empty_body_file_list = [] extra_body_file_list = [] for action_index, action in enumerate(input_action_type): direction_path = r"./input/" + action + r'/' files_grabbed = glob.glob( os.path.join(direction_path, '*.' + images_format)) files_count = len(files_grabbed) all_file_count += files_count body_count = 0 # if files_count == 0: # logger.error("The directory has no image in type" + images_format + '!') # sys.exit(-1) for i, file in enumerate(files_grabbed): image = common.read_imgfile(file, None, None) time_file_start = time.time() humans = e.inference(image, True, 4.0) time_file_elapsed = time.time() - time_file_start # logger.info('Processed image #%d/%d: %s in %.4f seconds.' % (i, i, file, time_file_elapsed)) # 图片的背景中有三脚架有可能会被openpose识别为一个身体 for human in humans: body_dict = dict() human_split = str(human).split('BodyPart:') for bodypart in human_split: left_parenthesis = bodypart.find('(') if left_parenthesis != -1: if left_parenthesis == 2: bodypart_index = int( bodypart[left_parenthesis - 2:left_parenthesis - 1]) else: bodypart_index = int(
img = img[:290, :, :] if img.shape[1] > 300: img = img[:, :300, :] if img.shape[0] < 290: pad_0 = 290 - img.shape[0] img = np.pad(img, [(0, pad_0), (0, 0), (0, 0)], 'constant', constant_values=0) if img.shape[1] < 300: pad_1 = 300 - img.shape[1] img = np.pad(img, [(0, 0), (0, pad_1), (0, 0)], 'constant', constant_values=0) h = estimator.inference(img, resize_to_default=True, upsample_size=4.0) if len(h) == 0: mask.append(False) human = np.zeros([14, 2], dtype=np.float32) visibility = np.zeros([14], dtype=np.bool) else: if len(h) > 1: mask.append(False) else: mask.append(True) human, visibility = tf_pose.common.MPIIPart.from_coco(h[0]) human = np.array(human) visibility = np.array(visibility) avg_location = np.mean(human[visibility], axis=0)
def main(args): if args.input is None: device = cv2.CAP_OPENNI2 else: device = os.path.abspath(args.input) print(device) capture = cv2.VideoCapture(device) if args.input is None and not capture.isOpened(): print("Capture device not opened") capture.release() return 1 elif args.input is not None and not capture.isOpened(): print("File not found") capture.release() return 1 fig = plt.figure(figsize=(12, 6)) # create pose estimator retval = capture.grab() _, color_im = capture.retrieve(0, cv2.CAP_OPENNI_BGR_IMAGE) image_size = color_im.shape if args.mode == 'openpose': pose_estimator2D = OpPoseEstimator(get_graph_path('cmu'), target_size=(image_size[1], image_size[0])) pose_lifter3D = Prob3dPose(PROB_MODEL_PATH) else: pose_estimator = LftdPoseEstimator(image_size, SESSION_PATH, PROB_MODEL_PATH) pose_estimator.initialise() n = 0 while True: n += 1 retval = capture.grab() if not retval: break retval, color_im = capture.retrieve(0, cv2.CAP_OPENNI_BGR_IMAGE) color_im = cv2.cvtColor(color_im, cv2.COLOR_BGR2RGB) if args.mode == 'openpose': # estimation by OpenPose start_time_2D = time.perf_counter() estimated_pose_2d = pose_estimator2D.inference( color_im, resize_to_default=True, upsample_size=2.0) end_time_2D = time.perf_counter() else: # estimation by LFTD estimated_pose_2d, visibility, pose_3d = \ pose_estimator.estimate(color_im) if len(estimated_pose_2d) == 0: plt.subplot(1, 1, 1) plt.imshow(color_im) plt.pause(0.01) continue if not args.do_3d: pose_3d = [] start_time_3D, end_time_3D = 0, 0 elif args.mode == 'openpose': pose_2d_mpii, visibility = to_mpii_pose_2d(estimated_pose_2d) start_time_3D = time.perf_counter() transformed_pose_2d, weights = pose_lifter3D.transform_joints( np.array(pose_2d_mpii), visibility) pose_3d = pose_lifter3D.compute_3d(transformed_pose_2d, weights) end_time_3D = time.perf_counter() if args.track_one: if 'prev' not in locals(): prev = np.zeros(2) if args.mode == 'openpose': means = np.mean(transformed_pose_2d, axis=1) else: means = np.mean(estimated_pose_2d, axis=1) argmin = np.argmin(np.linalg.norm(means - prev, ord=1, axis=1)) pose_3d = np.expand_dims(pose_3d[argmin], axis=0) prev = means[argmin] # Show 2D and 3D poses display_results(args.mode, color_im, estimated_pose_2d, visibility, pose_3d) if args.output: fig.savefig( os.path.join(os.path.abspath(args.output), 'out{:09d}.png'.format(n))) if args.mode == 'openpose': print("OP - 2D: {}, 3D: {}".format(end_time_2D - start_time_2D, end_time_3D - start_time_3D)) plt.pause(0.01) fig.clf() return retval
def extract_frames(csv_paths, video_paths, output_path): fps_time = 0 BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4, "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9, "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14, "LEye": 15, "REar": 16, "LEar": 17, "Background": 18 } BODY_PARTS_INTEREST = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4, "LShoulder": 5, "LElbow": 6, "LWrist": 7, "REye": 14, "LEye": 15 } # Setup posenet model w = 243 h = 432 showBG = True e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h)) # Setup output features table out_data = pd.DataFrame(columns=[ 'nose_x', 'nose_y', 'neck_x', 'neck_y', 'rshoulder_x', 'rshoulder_y', 'relbow_x', 'relbow_y', 'rwrist_x', 'rwrist_y', 'lshoulder_x', 'lshoulder_y', 'lelbow_x', 'lelbow_y', 'lwrist_x', 'lwrist_y', 'reye_x', 'reye_y', 'leye_x', 'leye_y', 'label' ]) # Open up an output display window cv2.namedWindow('output', cv2.WINDOW_NORMAL) cv2.resizeWindow('output', 576, 1024) # Start extracting features for every video for idx, (csv_path, video_path) in enumerate(zip(csv_paths, video_paths)): # Read label time ranges rawdata = pd.read_csv(csv_path, header=0) rawdata.columns = ['start_time', 'end_time', 'label'] cap = cv2.VideoCapture(video_path) #cap.set(cv2.CAP_PROP_POS_MSEC, 350*1000) # Save a pose to csv every n frames counter = 0 n = 5 last_label = '' # Moving average filter settings ma_length = 5 # Length of the moving average part_weight = 1.0 / ma_length # Weight of a part in the moving average ma_counter = 0 # Moving average of all body part locations; Used for filtering out jitters of the posenet bodypart_array_new = [] # Current set of bodyparts bodypart_array_ma = [] # Moving average set of bodyparts ma_bodyparts_array = [ ] # Sets of bodyparts for computing the moving average bodypart_to_del_idx = ma_length - 1 # Index of the set of bodyparts to overwrite in ma_bodyparts_array # Initialize arrays for i in range(0, 10): bodypart_array_new.insert(i, BodyPart(0, 0, 0.5, 0.5, 0)) bodypart_array_ma.insert(i, BodyPart(0, 0, 0, 0, 0)) # Must be all zeros for i in range(0, ma_length): ma_bodyparts_array.insert(i, bodypart_array_ma) bodypart_locations = np.zeros(2 * 10) if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): ret_val, image = cap.read() timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0 #if timestamp > 575: # break label = rawdata['label'][(timestamp > rawdata['start_time']) & (timestamp < rawdata['end_time'])] try: humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=2) except: break if len(humans) > 0: humans = [humans[0]] # Extract body part of interest # If a part is not detected, use the part's last location for idx, key in enumerate(BODY_PARTS_INTEREST): try: bodypart_array_new[idx] = humans[0].body_parts[ BODY_PARTS[key]] except: bodypart_array_new[idx] = bodypart_array_ma[idx] # Perform moving average if ma_counter == ma_length: bodypart_array_delete = ma_bodyparts_array[ bodypart_to_del_idx] for idx, part in enumerate(bodypart_array_ma): part.x = part.x + bodypart_array_new[idx].x*part_weight - \ bodypart_array_delete[idx].x*part_weight part.y = part.y + bodypart_array_new[idx].y*part_weight - \ bodypart_array_delete[idx].y*part_weight bodypart_array_ma[idx] = part ma_bodyparts_array[bodypart_to_del_idx] = copy.deepcopy( bodypart_array_new) bodypart_to_del_idx = (bodypart_to_del_idx - 1) % ma_length bodypart_array = bodypart_array_ma else: # We have not seen enough samples yet for the moving average # Just use the current sample as the output of the moving average filter bodypart_array = bodypart_array_new ma_counter = ma_counter + 1 ma_bodyparts_array[ma_length - ma_counter] = copy.deepcopy( bodypart_array_new) for idx, part in enumerate(bodypart_array_ma): part.x += bodypart_array_new[idx].x * part_weight part.y += bodypart_array_new[idx].y * part_weight bodypart_array_ma[idx] = part # Update the humans array with the moving average data humans[0].body_parts = {} for idx, (key, part) in enumerate( zip(BODY_PARTS_INTEREST, bodypart_array)): humans[0].body_parts[BODY_PARTS[key]] = part # Draw the skeleton (only the parts of interest) if not showBG: image = np.zeros(image.shape) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) # Generate feature vector neck = bodypart_array[BODY_PARTS['Neck']] for idx, part in enumerate(bodypart_array): bodypart_locations[idx * 2] = part.x - neck.x bodypart_locations[idx * 2 + 1] = part.y - neck.y # Add label and append to feature matrix if label.size != 0: cv2.putText(image, "Label: " + str(label.values[0]), (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # Check if the current label has persisted over the past n frames if last_label != label.values[0]: counter = 0 else: if counter == n: print(label.values[0]) out_data.loc[ len(out_data), 'nose_x':'leye_y'] = bodypart_locations out_data.loc[len(out_data) - 1, 'label'] = label.values[0] counter = 0 else: if label.values[0] == 'steering': counter += 0.5 # We have too many steering labels (about twice as much) else: counter += 1 last_label = label.values[0] cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.imshow('output', image) fps_time = time.time() if cv2.waitKey(1) == 27: break cap.release() # Save features to output csv out_data.to_csv(output_path, index=False) cv2.destroyAllWindows()
def start_game(config, params): #게임 들어가기 전 필요한 변수들 초기화 cam = cv2.VideoCapture(0) ret, named_window = cam.read() # 실루엣 맞추기: 카메라 키고, (사진 띄우고, point 4개 범위 안에 들어오면) X 3번 loop 나가 # sil = ["1.png", "2.png", "3.png"] # 이런 식 # 게임 시작: clear_menu, pause_menu, death_menu 중에 하나로 끝남 pause_img = cv2.imread('images/pause.png') score_img = cv2.imread('images/score.png') gameover_img = cv2.imread('images/gameover.png') # 목숨 관련 변수들 hp_x = config.imWidth // 2 + 400 hp_y = config.imHeight // 2 - 345 hp_yy = config.imHeight // 2 - 300 hp_w = 50 hp_h = 42 hp_image = cv2.imread('images/heart.png') w = 432 h = 368 e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h), trt_bool=str2bool("False")) global score while True: # restart 하면 여기로 돌아오지 (실루엣 다시 안 해도 됨) params["restart"] = False hp = 10 # death까지의 목숨(?) (10번 못 맞추면 death_menu) cur_order = 0 # params score = 0 game_patterns = [] # 재구성할 리스트 #엑셀에서 불러 온 값 for i in params[ "patterns"]: # ex) i = [4.,0 0, 0, 3, 0, 0, 12, 0, 0, 0] 여기 ~ 89 !! list = [] if i[10]: #포즈를 위해서 i[10]이 true면 포즈 있는거여서 포즈 취할 시간줌 => 필요없음 time1 = i[0] - 6.6 time2 = i[0] else: #포즈 없는 경우 -> 원에 사람의 bodypoint touch할 시간의 범위를 줌 time1 = i[0] - 3 # 여기 ~ 81!! time2 = i[0] + 1 list.extend([i[0], time1, time2, False, i[10]]) #원래는i[0]시간인데 time1~time2시간의 범위를 주겠다 # 구역 9개에 대해서 리스트에다가 (영역, 부위) 튜플을 원소로 append for j in range(1, 10): # j = 1 ~ 9 if i[j]: #0이 아니면...원이 나와야됨 list.append(tuple([j - 1, i[j] - 1 ])) #excel에서 초시간때문에 구역 번호랑 -1차이 -> j-1 game_patterns.append( list) #i[j]-1 : excel에 잘못 적음->일일이 고치기 귀찮아서 -> i[j]-1 # params["patterns"][0] = [4,0, 0, 0, 3, 0, 0, 12, 0, 0, 0] # -> game_patterns[0] = [4.0, 3.5, 4.2, False, (2, 2), (5, 11)] (구역번호, 부위번호) match_list = [] # 주어진 시간 안에 해당되는, match 해볼 규칙들 #a = input('Press...') start_time = time.time() resume_time = 0.0 resume_start = 0.0 play_music(params["song"], 0) while True: # game play ret, named_window = cam.read() config.named_window = cv2.resize(named_window, dsize=(1312, 736), interpolation=cv2.INTER_AREA) config.named_window = cv2.flip(config.named_window, 1) print(named_window.shape) humans = e.inference(named_window, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) # 4 / 1 ?? if not humans: continue human = humans[0] image_h, image_w = config.named_window.shape[:2] #Human 클래스의 add_pair 함수(estimator.py의 62줄)로 포인트를 파악하고, 파악한 좌표를 centers 리스트에 저장 #->머리부터 발끝까지의 키 포인트들이 화면에 표시됩니다. centers = [] for i in range(common.CocoPart.Background.value): #18번 if i not in human.body_parts.keys(): centers.append((0, 0)) else: body_part = human.body_parts[i] center = (image_w - int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)) centers.append(center) #사람의 keypoint받아서 화면에 출력 # 실루엣 play_time = time.time() - start_time # 플레이 시간 측정 pattern = game_patterns[cur_order] # 어떤 규칙이 time1을 지나면 & 아직 match_list에 없으면(= 첫번째 조건 만족해도 중복 append 방지 위해) #game_patterns[cur_order][1]는 맞춰야 하는 시간 범위의 최솟값 && match_list에 없으면.... if game_patterns[cur_order][1] < play_time and game_patterns[ cur_order] not in match_list: match_list.append(game_patterns[cur_order]) # cur_pattern = Pattern() cur_order += 1 if cur_order > len( game_patterns ) - 1: #이 조건을 만족하면 게임이 끝난것 ->cur_order고정 -> game 종료 cur_order = len(game_patterns) - 1 if match_list: #matchlist에 원소가 하나라도 있으면 아래 인자들 match함수에 넘겨줌 # centers resize, flip i = [4.0, 3.5, 4.2, F, 0 or PATH, (2, 3), (5, 12)] # 여기 ~ 33 ! match_list = match(config, match_list, centers, hp, play_time) #=> 위에 match 함수 가기~!!!! if match_list and match_list[0][ 2] < play_time: # and 아직 있으면 #터치해야 할 시간 지났음 -> 목숨 하나 빼기 hp -= 1 del match_list[ 0] # 고침!! 항상 [0]일 테니끼 right? #끝나면 match_list에서 지우니까 항상 [0]지움 # match_list.remove(game_patterns[cur_order]) 도 됨 cv2.putText(config.named_window, 'score:', (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 300)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) #실시간으로 점수 보여주기 cv2.putText(config.named_window, '%d' % score, (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 250)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) if cur_order == len( game_patterns ): # 이런 식 #게임이 끝났으면(재구성한 list가) -> clear_menu보여주기 config.named_window = score_img clear_menu(params, score) if cv2.waitKey(1) & 0xFF == ord('p'): params["exit"] = True if hp <= 0 or play_time > game_patterns[len(game_patterns) - 1][2] + 5: #마지막 game_patterns의 터치 허용 범위 시간이 지나고도 5초뒤 mixer.music.stop() death_menu(params) #죽음 if params["exit"] == True: break if params["restart"] == True: # 같은 게임 다시 시작 break if params["menu"] == True: break for i in range(hp): if i < 5: #실시간으로 변하는 window에 hp합성 show_hp(config.named_window, hp_image, hp_x + i * hp_w, hp_y, hp_w, hp_h) if i >= 5: #2줄로 만들었음 show_hp(config.named_window, hp_image, hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h) cv2.imshow('McgBcg', config.named_window) #image_h, image_w if params["exit"] == True: break if params["menu"] == True: break
live_y_arr = [] orig_x_dict = {} orig_y_dict = {} orig_x_arr = [] orig_y_arr = [] e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368), trt_bool=False) logger.debug('Image read 1 +') ############################################## LIVE ######################################## image1 = cv2.imread('INPUTS/input1.jpg') humans1 = e.inference(image1, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) for human in humans1: dict = human.body_parts for k, v in dict.items(): live_x_dict[v.part_idx] = round(v.x, 2) live_y_dict[v.part_idx] = round(v.y, 2) live_x_arr = form_arr(live_x_dict) live_y_arr = form_arr(live_y_dict) print('\n\n******XXXXXXXX LIVE X ARR XXXXXXXXXX************') print(live_x_arr) print('******YYYYYYYY LIVE Y ARR YYYYYYYYYY************') print(live_y_arr)
fig = plt.figure(num='real-time plotting') sf = fig.add_subplot(111) plt.title('stream data') plt.xticks([0, 1500000, 3000000, 4500000, 6000000]) plt.yticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) plt.axhline(y=threshold, color='r', linestyle='--', linewidth=2) line1, = sf.plot([0, 6000000], [0, 1], 'b-') while cap2.isOpened(): # 비디오가 잡히면 loop try: ret_val1, image1 = cap1.read() ret_val2, image2 = cap2.read() logger1.debug('image process+') humans1 = e1.inference(image1, resize_to_default=(w1 > 0 and h1 > 0), upsample_size=args1.resize_out_ratio) logger2.debug('image process+') humans2 = e2.inference(image2, resize_to_default=(w2 > 0 and h2 > 0), upsample_size=args2.resize_out_ratio) ### 2:Video) 만약 --showBG=False 이면 skeleton만 출력 if not args2.showBG: image2 = np.zeros(image2.shape) ### logger1.debug('postprocess+') image1 = TfPoseEstimator.draw_humans(image1, humans1, imgcopy=False) logger2.debug('postprocess+') image2 = TfPoseEstimator.draw_humans(image2,
class Terrain(object): def __init__(self): """ Initialize the graphics window and mesh surface """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) gz.translate(0, 0, -10) self.window.addItem(gx) self.window.addItem(gy) self.window.addItem(gz) model = 'mobilenet_thin' camera = 0 self.lines = {} self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] w, h = model_wh('432x368') self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() matlabfile = os.path.join(os.getcwd(), 'tf-pose-estimation', 'prob_model_params.mat') self.poseLifting = Prob3dPose(matlabfile) keypoints = self.mesh(image) self.points = gl.GLScatterPlotItem(pos=keypoints, color=pg.glColor((0, 255, 0)), size=15) self.window.addItem(self.points) for n, pts in enumerate(self.connection): self.lines[n] = gl.GLLinePlotItem(pos=np.array( [keypoints[p] for p in pts]), color=pg.glColor((0, 0, 255)), width=3, antialias=True) self.window.addItem(self.lines[n]) def mesh(self, image): image_h, image_w = image.shape[:2] width = 640 height = 480 pose_2d_mpiis = [] visibilities = [] humans = self.e.inference(image, upsample_size=4.0) for human in humans: pose_2d_mpii, visibility = common.MPIIPart.from_coco(human) pose_2d_mpiis.append([(int(x * width + 0.5), int(y * height + 0.5)) for x, y in pose_2d_mpii]) visibilities.append(visibility) pose_2d_mpiis = np.array(pose_2d_mpiis) visibilities = np.array(visibilities) transformed_pose2d, weights = self.poseLifting.transform_joints( pose_2d_mpiis, visibilities) pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights) keypoints = pose_3d[0].transpose() return keypoints / 80 def update(self): """ update the mesh and shift the noise each time """ ret_val, image = self.cam.read() try: keypoints = self.mesh(image) except AssertionError: print('body not in image') else: self.points.setData(pos=keypoints) for n, pts in enumerate(self.connection): self.lines[n].setData(pos=np.array([keypoints[p] for p in pts])) def start(self): """ get the graphics window open and setup """ if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'): QtGui.QApplication.instance().exec_() def animation(self, frametime=10): """ calls the update method to run in a loop """ timer = QtCore.QTimer() timer.timeout.connect(self.update) timer.start(frametime) self.start()
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) if args.output_video != '': output_video = cv2.VideoWriter(args.output_video, cv2.VideoWriter_fourcc(*'DIVX'), fps, (width, height)) while cap.isOpened(): ret_val, image = cap.read() try: humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) except Exception: break print(args.showBG) if not args.showBG: image = np.zeros(image.shape) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) if args.output_video != '': output_video.write(image) if cv2.waitKey(1) == 27: break
def abn(): print("abn") speak.Speak('action detection started') # Name of the directory containing the object detection module we're using MODEL_NAME = 'inference_graph' # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt') # Number of classes the object detector can identify NUM_CLASSES = 10 ## Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, tog ether with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Initialize webcam feed video = cv2.VideoCapture(0) ret = video.set(3, 1280) ret = video.set(4, 720) start_time = time.time() w, h = model_wh(args.resize) if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(1280, 720)) while (True): # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value ret, frame = video.read() frame_expanded = np.expand_dims(frame, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.60) #print(np.squeeze(classes),np.squeeze(boxes)) #pose humans = e.inference(frame, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) cv2.putText(frame, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if (time.time() - start_time) > 10: break # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('ProjectInt', frame) fps_time = time.time() # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up video.release() cv2.destroyAllWindows()
) # 'D:/Dataset/Action/my_dataset/crop/1/1_0001/' img_name = img_dir + img_path.split( '/' )[-1] # 'D:/Dataset/Action/my_dataset/crop/1/1_0001/img_00001.jpg' # print(img_name) if int(img_dir.split('_')[-1].replace( '/', '')) <= old_file_num[arr]: # 跳過舊的,只crop新的片段 break if not os.path.exists(img_dir): os.mkdir(img_dir) img = cv2.imread(img_path) image = img[:, 30:410, :].copy() humans = e.inference(image, resize_to_default=True, upsample_size=4.0) npimg, key_points = TfPoseEstimator.draw_one_human(image, humans, imgcopy=False, score=0.8) cv2.putText(npimg, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', npimg) fps_time = time.time() # print(temp_class.Neck_x, temp_class.Neck_y) if key_points[1][0] == 0 or key_points[1][1] == 0: err += 1
class PoseEstimator(object): """ The PoseEstimator class manages all operations related to Pose Estimation. It acts as a wrapper on top of TfPoseEstimator implement inside openpose model. The class supplies human pose coorinates to requestor objects. """ resize_out_ratio = 4.0 # no of relevance, kept for sake of completeness def __init__(self, resize='0x0', model='mobilenet_thin'): self.humans = None # list of humans with pose info self.image = None self.bboxes = [] # list of bbox [x1, y1, x2, y2] """ Two available models are cmu & mobilenet_thin if running in CPU mode only, then mobilenet_thin is recommended. Default fetches mobilenet_thin """ self.model = model """ if resize value is provided, it will resize images before they are processed. default=0x0, Recommends: 432x368 or 656x368 or 1312x736 """ self.w, self.h = model_wh(resize) self.loadModel() def loadModel(self): """ Loads the cmu or mobilenet model in memory """ try: if self.w == 0 or self.h == 0: self.e = TfPoseEstimator(get_graph_path(self.model), target_size=(432, 368)) else: self.e = TfPoseEstimator(get_graph_path(self.model), target_size=(self.w, self.h)) except MemoryError: print("couldn't load model into memory...") def infer(self, image): """ calls the inference API inside tf_pose (openpose) returning the poses of humans and drawing the skeleton on image frame """ self.image = image if self.image is None: raise Exception('The image is not valid. check your image') self.humans = self.e.inference(self.image, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=self.resize_out_ratio) self.image = TfPoseEstimator.draw_humans(self.image, self.humans, imgcopy=False) return self.image def getHumans(self): return self.humans def getImage(self): return self.image def _normalize_values(self, width, height): if self.w == 0: width = width * 432 else: width = width * self.w if self.h == 0: height = height * 368 else: height = height * self.h return width, height def getBboxes(self): return self.bboxes def getKeypoints(self): """ Returns a list of keypoints of all the persons in a frame keypt_list = [keypts1, keypts2] keypts = [x1, y1, score, ...] """ keypt_list = [] for human in self.humans: keypts = [] for key, values in human.body_parts.items(): # print (key, 'x val %.2f' % values.x, 'y val %.2f' % values.y) # print ('values.part_idx, values.uidx ', # values.part_idx, values.uidx) x, y = self._normalize_values(values.x, values.y) keypts.extend([x, y, values.score]) keypt_list.append(keypts) #print (keypt_list) return keypt_list def _updateBboxes(self): self.bboxes = [] for human in self.humans: min_x, min_y = math.inf, math.inf max_x, max_y = -1, -1 bbox = [min_x, min_y, max_x, max_y] for key, values in human.body_parts.items(): if values.x < min_x: min_x = values.x if values.y < min_y: min_y = values.y if values.x > max_x: max_x = values.x if values.y > max_y: max_y = values.y bbox = [min_x, min_y, max_x, max_y] self.bboxes.append(bbox) def showResults(self): """ utility method for debug purposes, not called from anywhere """ #print (self.humans) cv2.imshow('tf-pose-estimation result', self.image) cv2.waitKey(0) cv2.destroyAllWindows()
e = TfPoseEstimator(get_graph_path(model_name), target_size=(432, 368)) iteration_list = [10] for iteration in iteration_list: start = time.time() for i in range(iteration): # estimate human poses from a single image ! # image = common.read_imgfile(image_path, None, None) image = cv2.imread(image_path) print("image shape = %s" % str(image.shape)) if image is None: sys.exit(-1) t = time.time() humans = e.inference(image, resize_to_default=False, upsample_size=resize_out_ratio) elapsed = time.time() - t end = time.time() print("It takes %s sec to run %d images for tf-openpose" % (str(end - start), iteration)) print(image.shape) print(humans) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) import matplotlib.pyplot as plt plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
class Tracker: """ attributes: people = dict of people, key is their uuid names = dict of names, key is uuid frame_count = count of frames tracked scan_every_n_frames = # of frames between person scanning their face maximum_difference_to_match = the maximum average differnece of points to mark it as not the same pose estimator = TfPoseEstimator poses = [] of poses from TfPoseEstimator from current frame faces = [] positions of faces from current frame save_faces_to - None if disabled, a string for an existing dir if enabled """ def __init__(self): self.frame_count = 0 self.scan_every_n_frames = 120 self.max_face_scans = 5 self.maximum_difference_to_match = 0.08 self.names = {} self.people = {} self.estimator = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(432, 368)) self.encodings = {} self.save_faces_to = None def load_encodings(self, filepath): self.encodings = pickle.loads(open("./encodings.p", "rb").read()) def save_encodings(self, filepath): encoding_file = open(filepath, "wb") encoding_file.write(pickle.dumps(self.encodings)) encoding_file.close() def create_encodings(self, faces_directory): facedirs = [ filename for filename in os.listdir(faces_directory) if not isfile(join(faces_directory, filename)) ] faces = dict.fromkeys(facedirs, []) encodings = {} for name in facedirs: faces[name] = [] for filepath in [ filename for filename in os.listdir(join(faces_directory, name)) if isfile(join(faces_directory, name, filename)) ]: faces[name].append(join(faces_directory, name, filepath)) for name in faces: encodings[name] = [] for filepath in faces[name]: try: image = cv2.imread(filepath) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) encoding = face_recognition.face_encodings( image, [(0, 0, image.shape[0], image.shape[1])]) encodings[name].append(encoding[0]) except: continue self.encodings = encodings def get_pose(self, image): w = 432 h = 368 return self.estimator.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) def draw_output(self, image, draw_body=True, draw_face=True, draw_label=True): if draw_body: poses = [] for person in self.people: if person.is_visible: poses.append(person.pose) TfPoseEstimator.draw_humans(image, poses, imgcopy=False) for person in self.people: if not self.people[person].is_visible: continue if draw_face: top, left, bottom, right = self.people[person].face top = math.floor(top * image.shape[0]) bottom = math.floor(bottom * image.shape[0]) left = math.floor(left * image.shape[1]) right = math.floor(right * image.shape[1]) cv2.rectangle(image, (left, top), (right, bottom), (0, 0, 255), 2, 0) if draw_label: cv2.putText(image, self.people[person].id, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255)) return image def scan_face(self, image, person): if not person.is_visible: return top, left, bottom, right = person.face top = math.floor(top * image.shape[0]) left = math.floor(left * image.shape[1]) bottom = math.floor(bottom * image.shape[0]) right = math.floor(right * image.shape[1]) encoding = face_recognition.face_encodings( image, [(top, right, bottom, left)]) if len(encoding) <= 0: return encoding = encoding[0] person.set_encoding(encoding) def compare_known_faces(self, person): if len(list(self.encodings.keys())) is 0: return name_key = [] encodings = [] counts = {} for name in self.encodings: results = face_recognition.compare_faces(self.encodings[name], person.encodings[-1]) match_count = results.count(True) counts[name] = match_count if match_count / len(results) >= 0.75: break biggest_match = max(counts, key=counts.get) if (counts[biggest_match] <= 3): return None else: return biggest_match # Handed a frame to process for tracking def process_frame(self, image): self.frame_count += 1 #1 - Generate all the poses self.poses = self.get_pose(image) #3 - Tick each person for person in self.people: self.people[person].tick() #2 - see if the pose is someone we've seen in our people, # or if it's someone new to create a new person object for new_people = [] for pose in self.poses: handled = False for person in self.people: difference = self.people[person].distance_from_pose(pose) if difference < self.maximum_difference_to_match: self.people[person].update(pose) handled = True break if handled: continue else: #Create a new person person = Person() person.update(pose) new_people.append(person) for person in new_people: self.people[person] = person #4 - Now that we've generated the people, "tock" through all people # in order to have their decay occur for person in self.people: #scan the face of all new people if person in new_people: self.scan_face(image, person) #Scan the face of everyone else that hasnt been scanned for #self.scan_every_n_frames if person.is_visible and person.last_face_scan % self.scan_every_n_frames == 0 and len( person.encodings) < self.max_face_scans: self.scan_face(image, person) if person.last_face_scan == 0: id = self.compare_known_faces(person) older_person = self.people.get(id) if older_person is not None: person[id] = person if id is not None: person.id = id elif self.save_faces_to: person.save_face(image, self.save_faces_to) self.people[person].tock()
logger.debug('cam read+') cam = cv2.VideoCapture(args.camera) frame_width = int(cam.get(3)) frame_height = int(cam.get(4)) out = cv2.VideoWriter('video_out.mp4', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30, (frame_width, frame_height)) ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True: ret_val, image = cam.read() logger.debug('image process+') humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) logger.debug('postprocess+') image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) logger.debug('show+') no_people = len(humans) print("no. of people: ", no_people) cv2.putText(image, "People: %d" % (no_people), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('tf-pose-estimation result', image) out.write(image) fps_time = time.time()
else: e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(432, 368)) for is_variable, data_type in enumerate(data_types): files = os.listdir(path + data_type) files.sort() for image_name in files: t = time.time() split_image_name = image_name.split(".") if split_image_name[-1] != "jpg": continue index = int(image_name.split(".")[0]) img = common.read_imgfile(path + data_type + "/" + image_name, None, None) humans = e.inference(img, resize_to_default=False, upsample_size=4.0) for id, human in enumerate(humans): if id > 0: print("multiple humans in image: " + image_name) output_file.write(str(index) + ", " + str(is_variable)) for value, body_part_name in enumerate(CocoPart): if display_images: if value in human.body_parts: body_part = human.body_parts[value] print( str(value) + " " + body_part_name.name + "- x: " + str(body_part.x) + " y: " + str(body_part.x) + " score: " + str(body_part.score))
class Dance: def __init__(self, camera_num=0, w=432, h=368, model='mobilenet_thin', resize_out_ratio=4.0): # SSH with drone self.ssh = None self.host_keys = '/Users/rshmyrev/.ssh/known_hosts' self.drone_ip = '192.168.1.31' self.drone_username = '******' self.drone_pass = '******' self.ssh_stdin = None self.ssh_stdout = None self.ssh_stderr = None self.init_ssh() # Stabilize self._send_dima_command('stab') logger.info('Drone stabilized') # Camera self.camera_num = camera_num self.cam = None self._init_cam() # Model self.model = model self.resize_out_ratio = resize_out_ratio self.w = w self.h = h self.e = None self._init_estimator() # Image, humans, body parts self.image = None self.humans = [] self.human = None self.pose = None self.hand = {'right': None, 'left': None} self.elbow_angle = {'right': None, 'left': None} self.wrist_position = {'right': None, 'left': None} self.hand_direction = {'right': None, 'left': None} # Draw params self.time = time.time() self.text_params = (cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) self.draw_resize = 1 # Cmd self.cmd = None self.prev_cmd = None self.stop = False # Poses dict self.poses = { 1: { 'desc': 'both hands up', 'cmd': 'forward' }, 2: { 'desc': 'both hands down', 'cmd': 'land', 'stop': True }, 3: { 'desc': 'both hands side', 'cmd': 'backward' }, 4: { 'desc': 'right hand up, left hand side', 'cmd': 'left' }, 5: { 'desc': 'left hand up, right hand side', 'cmd': 'right' }, 6: { 'desc': 'right hand down, left hand side', 'cmd': 'go2', 'stop': True }, 7: { 'desc': 'left hand down, right hand side', 'cmd': 'up' }, } def _init_cam(self): self.cam = cv2.VideoCapture(self.camera_num) self.cam.read() def _init_estimator(self): logger.debug('initialization %s : %s' % (self.model, get_graph_path(self.model))) if self.w > 0 and self.h > 0: self.e = TfPoseEstimator(get_graph_path(self.model), target_size=(self.w, self.h)) else: self.e = TfPoseEstimator(get_graph_path(self.model), target_size=(432, 368)) def init_ssh(self): self.ssh = paramiko.SSHClient() self.ssh.load_host_keys(self.host_keys) self.ssh.connect(self.drone_ip, username=self.drone_username, password=self.drone_pass) def reset_params(self): self.image = None self.humans = [] self.human = None self.pose = None for hand_name in ('right', 'left'): self.hand[hand_name] = None self.elbow_angle[hand_name] = None self.wrist_position[hand_name] = None self.hand_direction[hand_name] = None self.ssh_stdin = None self.ssh_stdout = None self.ssh_stderr = None def get_humans(self): _, self.image = self.cam.read() logger.info('cam image={:d}x{:d}'.format(self.image.shape[1], self.image.shape[0])) logger.debug('image process+') self.humans = self.e.inference(self.image, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=self.resize_out_ratio) # logger.debug('Session: {}'.format(self.e.persistent_sess)) logger.info('Count of humans: {}'.format(len(self.humans))) # logger.debug('Humans parts: {}'.format(self.humans[0])) def choose_best_human(self): """It's you""" if self.humans: self.human = self.humans[0] # by Dima def destroy_all_humans(self): # TODO pass def get_hands(self): if not self.human: return self.hand['right'] = [ (self.human.body_parts[i].x, self.human.body_parts[i].y) for i in range(2, 5) if i in self.human.body_parts ] self.hand['left'] = [(self.human.body_parts[i].x, self.human.body_parts[i].y) for i in range(5, 8) if i in self.human.body_parts] if len(self.hand['right']) != 3: self.hand['right'] = None if len(self.hand['left']) != 3: self.hand['left'] = None @staticmethod def _elbow_angle(hand, vertical=True): """ :param vertical: :param hand: 3 points: (x1, x2, x3). x1 - плечо, x2 - локоть, x3 - запястье :return: degrees for x1-x2-x3 angle """ x1, x2, x3 = hand if vertical: # если нужно посчитать относительно вертикали, а не плеча x1 = (x2[0], x2[1] + 1) # берем точку локтя и сдвигаем вверх v1 = vector(x1, x2) # плечо v2 = vector(x2, x3) # предплечье angle = angle_between(v1, v2) # logger.debug('Angle in rads: %f' % angle) return np.degrees(angle) @staticmethod def _wrist_position(hand): x1, x2, x3 = hand if x3[1] < x1[1] and x3[1] < x2[1]: return 'up' elif x3[1] > x1[1] and x3[1] > x2[1]: return 'down' else: return '' def _hand_direction(self, hand): angle_gap = 25 # degree angle = self.elbow_angle[hand] if angle <= 0 + angle_gap: return '90' elif 90 - angle_gap <= angle <= 90 + angle_gap: return '180' elif 180 - angle_gap <= angle: return '270' else: return '' def calculate_hands_direction(self): for hand_name in ('right', 'left'): if not self.hand[hand_name]: continue self.elbow_angle[hand_name] = self._elbow_angle( self.hand[hand_name]) self.wrist_position[hand_name] = self._wrist_position( self.hand[hand_name]) self.hand_direction[hand_name] = self._hand_direction(hand_name) logger.info( '{} hand. Angle: {}, wrist_direction: {}, hand_direction: {}'. format(hand_name, int(self.elbow_angle[hand_name]), self.wrist_position[hand_name], self.hand_direction[hand_name])) def calculate_pose(self): if not self.hand_direction['right'] or not self.hand_direction['left']: self.pose = None if self.hand_direction['right'] == '90' and self.hand_direction[ 'left'] == '90': self.pose = 1 elif self.hand_direction['right'] == '270' and self.hand_direction[ 'left'] == '270': self.pose = 2 elif self.hand_direction['right'] == '180' and self.hand_direction[ 'left'] == '180': self.pose = 3 elif self.hand_direction['right'] == '90' and self.hand_direction[ 'left'] == '180': self.pose = 4 elif self.hand_direction['left'] == '90' and self.hand_direction[ 'right'] == '180': self.pose = 5 elif self.hand_direction['right'] == '270' and self.hand_direction[ 'left'] == '180': self.pose = 6 elif self.hand_direction['left'] == '270' and self.hand_direction[ 'right'] == '180': self.pose = 7 def send_pose2drone(self): if self.stop: # поднят флаг остановки return pose = self.poses[self.pose] cmd = pose['cmd'] self.stop = pose.get('stop', False) if cmd == self.prev_cmd: # не отправляем одинаковые команды return self._send_dima_command(cmd) self.prev_cmd = cmd def infinite_loop(self): while True: self.reset_params() self.get_humans() if not self.humans: continue self.choose_best_human() if not self.human: continue self.get_hands() self.calculate_hands_direction() self.calculate_pose() # Draw image = TfPoseEstimator.draw_humans(self.image, [ self.human, ], imgcopy=False) # resize image = cv2.resize(image, None, fx=self.draw_resize, fy=self.draw_resize) # Draw angles and pose image = self._draw_angle(image) # image = self._draw_hand_direction(image) # image = self._draw_wrist_position(image) image = self._draw_pose(image) image = self._draw_cmd(image) image = self._draw_prev_cmd(image) # image = self._draw_fps(image) cv2.imshow('Result', image) if self.pose: logger.info('Pose {}, {} '.format( self.pose, self.poses[self.pose]['desc'])) self.send_pose2drone() if cv2.waitKey(1) == 27: break cv2.destroyAllWindows() def _draw_angle(self, npimg): image_h, image_w = npimg.shape[:2] for hand_name in ('right', 'left'): if not self.elbow_angle[hand_name]: continue center_point = self.hand[hand_name][1] center_on_image = (int(center_point[0] * image_w + 3.5), int(center_point[1] * image_h + 0.5)) cv2.putText(npimg, "Angle: {}".format(int(self.elbow_angle[hand_name])), center_on_image, *self.text_params) return npimg def _draw_hand_direction(self, npimg): image_h, image_w = npimg.shape[:2] for hand_name in ('right', 'left'): if not self.hand_direction[hand_name]: continue center_point = self.hand[hand_name][0] center_on_image = (int(center_point[0] * image_w + 3.5), int(center_point[1] * image_h + 0.5)) cv2.putText(npimg, "Direction: {}".format(self.hand_direction[hand_name]), center_on_image, *self.text_params) return npimg def _draw_wrist_position(self, npimg): image_h, image_w = npimg.shape[:2] for hand_name in ('right', 'left'): if not self.wrist_position[hand_name]: continue center_point = self.hand[hand_name][2] center_on_image = (int(center_point[0] * image_w + 3.5), int(center_point[1] * image_h + 0.5)) cv2.putText( npimg, "Wrist position: {}".format(self.wrist_position[hand_name]), center_on_image, *self.text_params) return npimg def _draw_pose(self, npimg): if self.pose: pose = self.poses[self.pose] cv2.putText(npimg, "Pose: %s" % pose['desc'], (10, 10), *self.text_params) return npimg def _draw_cmd(self, npimg): if self.pose: pose = self.poses[self.pose] cv2.putText(npimg, "Command: %s" % pose['cmd'], (10, 30), *self.text_params) return npimg def _draw_prev_cmd(self, npimg): if self.prev_cmd: cv2.putText(npimg, "Prev sended cmd: %s" % self.prev_cmd, (10, 50), *self.text_params) return npimg def _draw_fps(self, npimg): cv2.putText(npimg, "FPS: %f" % (1.0 / (time.time() - self.time)), (10, 10), *self.text_params) self.time = time.time() return npimg def _send_command(self, command): # 'source /opt/ros/kinetic/setup.bash' # 'source /home/pi/catkin_ws/devel/setup.bash' cmd = 'source /opt/ros/kinetic/setup.bash; source /home/pi/catkin_ws/devel/setup.bash; {}'.format( command) # self.ssh_stdin, self.ssh_stdout, self.ssh_stderr = self.ssh.exec_command(cmd) # if self.ssh_stdout: # logger.info(self.ssh_stdout.read()) # try don't read remote std self.ssh.exec_command(cmd) def _send_ros_command(self, command, params): cmd = 'rosservice call /{} "{}"'.format(command, json.dumps(params)) self._send_command(cmd) def _send_dima_command(self, filename): cmd = 'bash /home/pi/show/{}.sh'.format(filename) self._send_command(cmd) def get_telemetry(self, frame_id=''): command = "get_telemetry" params = {'frame_id': frame_id} self._send_ros_command(command, params) def navigate(self, x=0, y=0, z=0, speed=0.5, frame_id='aruco_map', update_frame=True, auto_arm=True): command = "navigate" params = { 'x': x, 'y': y, 'z': z, 'speed': speed, 'frame_id': frame_id, 'update_frame': update_frame, 'auto_arm': auto_arm, } self._send_ros_command(command, params) def square(self, z=1, speed=1, sleep_time=1, update_frame=False): self.navigate(x=1, y=1, z=z, speed=speed, frame_id='aruco_map', update_frame=update_frame) sleep(sleep_time) self.navigate(x=1, y=2, z=z, speed=speed, frame_id='aruco_map', update_frame=update_frame) sleep(sleep_time) self.navigate(x=2, y=2, z=z, speed=speed, frame_id='aruco_map', update_frame=update_frame) sleep(sleep_time) self.navigate(x=2, y=1, z=z, speed=speed, frame_id='aruco_map', update_frame=update_frame) sleep(sleep_time) self.navigate(x=1, y=1, z=z, speed=speed, frame_id='aruco_map', update_frame=update_frame) sleep(sleep_time) self.land() def up(self, z=1, tolerance=0.2): """ Up on z metres :param z: высота :param tolerance: точность проверки высоты (м) """ start = self.get_telemetry() # Запоминаем изначальную точку self.navigate(z=z, speed=0.5, frame_id='aruco_map', auto_arm=True) # Взлетаем на 2 м while True: # Ожидаем взлета if self.get_telemetry( ).z - start.z + z < tolerance: # Проверяем текущую высоту break sleep(0.2) # ??? как зависание сделать def land(self): self._send_ros_command('land', params={}) def cmd_test(self): self._send_dima_command('test')
def start_game(config, params): cam = cv2.VideoCapture(0) ret, named_window = cam.read() # hp(health point) attributes hp_x = config.imWidth // 2 + 400 hp_y = config.imHeight // 2 - 345 hp_yy = config.imHeight // 2 - 300 hp_w = 50 hp_h = 42 hp_image = cv2.imread('images/heart.png') score_img = cv2.imread('images/score.png') w = 432 h = 368 e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h), trt_bool=str2bool("False")) while True: params["restart"] = False hp = 10 cur_order = 0 score = 0 game_patterns = [] for i in params["patterns"]: list = [] time1 = i[0] - 3 time2 = i[0] + 1 list.extend([i[0], time1, time2, False, i[10]]) for j in range(1, 10): # j = 1 ~ 9 if i[j]: list.append(tuple([j - 1, i[j] - 1])) game_patterns.append(list) match_list = [] # sets to be checked for scoring; reset each frame start_time = time.time() play_music(params["song"], 0) while True: # game play ret, named_window = cam.read() config.named_window = cv2.resize(named_window, dsize=(1312, 736), interpolation=cv2.INTER_AREA) config.named_window = cv2.flip(config.named_window, 1) print(named_window.shape) humans = e.inference(named_window, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) if not humans: continue human = humans[0] image_h, image_w = config.named_window.shape[:2] centers = [] for i in range(common.CocoPart.Background.value): if i not in human.body_parts.keys(): centers.append((0, 0)) else: body_part = human.body_parts[i] center = (image_w - int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)) centers.append(center) play_time = time.time() - start_time pattern = game_patterns[cur_order] if game_patterns[cur_order][1] < play_time and game_patterns[ cur_order] not in match_list: match_list.append(game_patterns[cur_order]) cur_order += 1 if cur_order > len(game_patterns) - 1: cur_order = len(game_patterns) - 1 if match_list: match_list = match(config, match_list, centers, hp, play_time, score) if match_list and match_list[0][2] < play_time: # and 아직 있으면 hp -= 1 del match_list[0] cv2.putText(config.named_window, 'score:', (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 300)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) cv2.putText(config.named_window, '%d' % score, (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 250)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) if cur_order == len(game_patterns): config.named_window = score_img clear_menu(params, score) if cv2.waitKey(1) & 0xFF == ord('p'): params["exit"] = True if hp <= 0 or play_time > game_patterns[len(game_patterns) - 1][2] + 5: mixer.music.stop() death_menu(params) if params["exit"] == True: break if params["restart"] == True: break if params["menu"] == True: break for i in range(hp): if i < 5: show_hp(config.named_window, hp_image, hp_x + i * hp_w, hp_y, hp_w, hp_h) if i >= 5: show_hp(config.named_window, hp_image, hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h) cv2.imshow('McgBcg', config.named_window) if params["exit"] == True: break if params["menu"] == True: break
class SkeletonDetector(object): # This class is mainly copied from https://github.com/ildoonet/tf-pose-estimation def __init__(self, model="cmu", image_size="432x368"): ''' Arguments: model {str}: "cmu" or "mobilenet_thin". image_size {str}: resize input images before they are processed. Recommends : 432x368, 336x288, 304x240, 656x368, ''' # -- Check input assert (model in ["mobilenet_thin", "cmu"]) self._w, self._h = _get_input_img_size_from_string(image_size) # -- Set up openpose model self._model = model self._resize_out_ratio = 4.0 # Resize heatmaps before they are post-processed. If image_size is small, this should be large. self._config = _set_config() self._tf_pose_estimator = TfPoseEstimator(get_graph_path(self._model), target_size=(self._w, self._h), tf_config=self._config) self._prev_t = time.time() self._cnt_image = 0 # -- Set logger self._logger = _set_logger() def detect(self, image): ''' Detect human skeleton from image. Arguments: image: RGB image with arbitrary size. It will be resized to (self._w, self._h). Returns: humans {list of class Human}: `class Human` is defined in "src/githubs/tf-pose-estimation/tf_pose/estimator.py" The variable `humans` is returned by the function `TfPoseEstimator.inference` which is defined in `src/githubs/tf-pose-estimation/tf_pose/estimator.py`. I've written a function `self.humans_to_skels_list` to extract the skeleton from this `class Human`. ''' self._cnt_image += 1 if self._cnt_image == 1: self._image_h = image.shape[0] self._image_w = image.shape[1] self._scale_h = 1.0 * self._image_h / self._image_w t = time.time() # Do inference humans = self._tf_pose_estimator.inference( image, resize_to_default=(self._w > 0 and self._h > 0), upsample_size=self._resize_out_ratio) # Print result and time cost elapsed = time.time() - t self._logger.info('inference image in %.4f seconds.' % (elapsed)) return humans def draw(self, img_disp, humans): img_disp = TfPoseEstimator.draw_humans(img_disp, humans, imgcopy=False) if IS_DRAW_FPS: cv2.putText( img_disp, "fps = {:.1f}".format( (1.0 / (time.time() - self._prev_t))), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) self._prev_t = time.time() def humans_to_skels_list(self, humans, scale_h=None): ''' Get skeleton data of (x, y * scale_h) from humans. Arguments: humans {a class returned by self.detect} scale_h {float}: scale each skeleton's y coordinate (height) value. Default: (image_height / image_widht). Returns: skeletons {list of list}: a list of skeleton. Each skeleton is also a list with a length of 36 (18 joints * 2 coord values). scale_h {float}: The resultant height(y coordinate) range. The x coordinate is between [0, 1]. The y coordinate is between [0, scale_h] ''' if scale_h is None: scale_h = self._scale_h skeletons = [] NaN = 0 for human in humans: skeleton = [NaN] * (18 * 2) for i, body_part in human.body_parts.items(): # iterate dict idx = body_part.part_idx skeleton[2 * idx] = body_part.x skeleton[2 * idx + 1] = body_part.y * scale_h skeletons.append(skeleton) return skeletons, scale_h
def get_front_sizes(human_height=HUMAN_HEIGHT): dlab = models.segmentation.deeplabv3_resnet101(pretrained=True).eval() sizeX, sizeY, minW, minH, maxW, maxH, rgb = segment(dlab, '../data/6.jpeg', show_orig=True) default_height = 432 defult_width = 368 default_model = 'cmu' default_resize = 4.0 img = common.read_imgfile('../data/new.png') e = TfPoseEstimator(get_graph_path(default_model), target_size=(default_height, defult_width)) humans = e.inference(img, resize_to_default=False, upsample_size=default_resize) body_parts = humans[0].body_parts parts_coordinates = {} hand_size_left = arm_size([2, 3, 4], minW, body_parts, sizeX, sizeY) hand_size_right = arm_size([5, 6, 7], maxW, body_parts, sizeX, sizeY) height_on_image = maxH - minH human_in_pix = human_height / height_on_image shoulders = height_in_pic((body_parts[2].y + body_parts[5].y) / 2, sizeY) hips = int(height_in_pic((body_parts[8].y + body_parts[11].y) / 2, sizeY)) # waist select waist = int(shoulders + (hips - shoulders) / 3 * 2) print(waist) waist_left_bound = maxW waist_right_bound = 0 waist_left_bound, waist_right_bound = get_size(rgb, waist, waist_left_bound, waist_right_bound) parts_coordinates[20] = Point(waist_left_bound, waist) parts_coordinates[21] = Point(waist_right_bound, waist) waist_level = human_in_pix * (waist - minH) waist = human_in_pix * (waist_right_bound - waist_left_bound) # chest select chest = int(shoulders + (hand_size_left + hand_size_right) / 10) chest_left_bound = maxW chest_right_bound = 0 print(chest) chest_left_bound, chest_right_bound = get_size(rgb, chest, chest_left_bound, chest_right_bound) parts_coordinates[18] = Point(chest_left_bound, chest) parts_coordinates[19] = Point(chest_right_bound, chest) chest_level = human_in_pix * (chest - minH) chest = human_in_pix * (chest_right_bound - chest_left_bound) # hips select hips_left_bound = maxW hips_right_bound = 0 hips_left_bound, hips_right_bound = get_size(rgb, hips, hips_left_bound, hips_right_bound) parts_coordinates[23] = Point(hips_left_bound, hips) parts_coordinates[24] = Point(hips_right_bound, hips) hips_level = human_in_pix * (hips - minH) hips = human_in_pix * (hips_right_bound - hips_left_bound) # leg length leg_length = (height_in_pic(body_parts[10].y, sizeY) - height_in_pic(body_parts[9].y, sizeY) + height_in_pic(body_parts[13].y, sizeY) - height_in_pic(body_parts[12].y, sizeY)) leg = human_in_pix * leg_length for part in body_parts: part return chest, hips, waist, leg, chest_level, hips_level, waist_level