def __init__(self, target_ip): self.CWD_PATH = os.getcwd() self.CWD_PATH = os.path.abspath(os.path.join(self.CWD_PATH, os.pardir)) self.CWD_PATH = os.path.join(self.CWD_PATH, '3_BRobot') # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' PATH_TO_CKPT = os.path.join(self.CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(self.CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.right_clicks = [] # self.right_clicks = [[375, 41], [1000, 709]] # mouse callback function def mouse_callback(event, x, y, flags, params): #right-click event value is 2 if event == 2: if len(self.right_clicks) < 2: self.right_clicks.append([x, y]) else: self.right_clicks = [[x, y]] print(self.right_clicks) CAM_ID = 1 self.cam = cv2.VideoCapture(int(CAM_ID)) self.window_name = 'Cam' + str(CAM_ID) cv2.namedWindow(self.window_name) cv2.setMouseCallback(self.window_name, mouse_callback) self.prevTime = 0 self.window_size = (1312, 736) if self.cam.isOpened() == False: print('Can\'t open the CAM(%d)' % (CAM_ID)) exit() self.face_queue = Queue() self.gender_queue = Queue() self.age_queue = Queue() self.process_gender = Process(target=gender_estimate, args=(self.face_queue, self.gender_queue)) self.process_gender.start() self.process_age = Process(target=age_estimate, args=(self.face_queue, self.age_queue)) self.process_age.start() self.w = self.window_size[0] self.h = self.window_size[1] self.e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(self.w, self.h))
import cv2 import numpy as np import errno from DNN import DNN from tf_pose.estimator import TfPoseEstimator from tf_pose.networks import get_graph_path from functions import draw_landmarks, convert_landscape_potrait, write_text_image import matplotlib.pyplot as plt model = "mobilenet_thin" landmark_color = [0, 255, 0] pose_classifier = DNN() pose_classifier.load('/home/hrishi/workspace/repo/PoseEstimation2/model/pose_classifier.h5') width = 640 height = 480 e = TfPoseEstimator(get_graph_path(model), target_size=(width, height)) landmarks_count = 18 required_landmarks_count = 8 # We we only need 8 landmarks for our model frame_counter = 0 frame_per_clip = 10 significant_frame_counter = 0 files = glob.glob("Sample/*") estimator = TfPoseEstimator(get_graph_path(model), target_size=(width, height)) expected_landmarks = 18 frame_counter = 0 boundary_frames_seperator = 15 frame_interval = 1 for i in range(len(files)): file = files[i] boundary_frames = 0 try:
class SkeletonDetector(object): # This class is mainly copied from https://github.com/ildoonet/tf-pose-estimation def __init__(self, model="cmu", image_size="432x368"): ''' Arguments: model {str}: "cmu" or "mobilenet_thin". image_size {str}: resize input images before they are processed. Recommends : 432x368, 336x288, 304x240, 656x368, ''' # -- Check input assert(model in ["mobilenet_thin", "cmu"]) self._w, self._h = _get_input_img_size_from_string(image_size) # -- Set up openpose model self._model = model self._resize_out_ratio = 4.0 # Resize heatmaps before they are post-processed. If image_size is small, this should be large. self._config = _set_config() self._tf_pose_estimator = TfPoseEstimator( get_graph_path(self._model), target_size=(self._w, self._h), tf_config=self._config) self._prev_t = time.time() self._cnt_image = 0 # -- Set logger self._logger = _set_logger() def detect(self, image): ''' Detect human skeleton from image. Arguments: image: RGB image with arbitrary size. It will be resized to (self._w, self._h). Returns: humans {list of class Human}: `class Human` is defined in "src/githubs/tf-pose-estimation/tf_pose/estimator.py" The variable `humans` is returned by the function `TfPoseEstimator.inference` which is defined in `src/githubs/tf-pose-estimation/tf_pose/estimator.py`. I've written a function `self.humans_to_skels_list` to extract the skeleton from this `class Human`. ''' self._cnt_image += 1 if self._cnt_image == 1: self._image_h = image.shape[0] self._image_w = image.shape[1] self._scale_h = 1.0 * self._image_h / self._image_w t = time.time() # Do inference humans = self._tf_pose_estimator.inference( image, resize_to_default=(self._w > 0 and self._h > 0), upsample_size=self._resize_out_ratio) # Print result and time cost elapsed = time.time() - t self._logger.info('inference image in %.4f seconds.' % (elapsed)) return humans def draw(self, img_disp, humans): ''' Draw human skeleton on img_disp inplace. Argument: img_disp {RGB image} humans {a class returned by self.detect} ''' img_disp = TfPoseEstimator.draw_humans(img_disp, humans, imgcopy=False) if IS_DRAW_FPS: cv2.putText(img_disp, "fps = {:.1f}".format( (1.0 / (time.time() - self._prev_t) )), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) self._prev_t = time.time() def humans_to_skels_list(self, humans, scale_h = None): ''' Get skeleton data of (x, y * scale_h) from humans. Arguments: humans {a class returned by self.detect} scale_h {float}: scale each skeleton's y coordinate (height) value. Default: (image_height / image_widht). Returns: skeletons {list of list}: a list of skeleton. Each skeleton is also a list with a length of 36 (18 joints * 2 coord values). scale_h {float}: The resultant height(y coordinate) range. The x coordinate is between [0, 1]. The y coordinate is between [0, scale_h] ''' if scale_h is None: scale_h = self._scale_h skeletons = [] NaN = 0 for human in humans: skeleton = [NaN]*(18*2) for i, body_part in human.body_parts.items(): # iterate dict idx = body_part.part_idx skeleton[2*idx]=body_part.x skeleton[2*idx+1]=body_part.y * scale_h skeletons.append(skeleton) return skeletons, scale_h
#from tf_pose import common from tf_pose.estimator import TfPoseEstimator from tf_pose.networks import get_graph_path, model_wh from tracking_3 import tracking_function from search_weld import search_weld # 読み込む動画のパス movie_file = 'test.mp4' output_file = 'test_result7.mp4' output_csv1 = "id_list_add_weld7.csv" # tf-poseの準備 model = 'cmu' w, h = model_wh('432x368') if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) # 対象の動画を読み込む vc = cv2.VideoCapture(movie_file) # アウトプットの準備 fourcc = cv2.VideoWriter_fourcc(*'DIVX') fps = int(vc.get(cv2.CAP_PROP_FPS)) size = ( int(vc.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vc.get(cv2.CAP_PROP_FRAME_HEIGHT)) ) vw = cv2.VideoWriter(output_file, fourcc, fps, size)
parser.add_argument('--frame', type=float, default=1, help='the frame parcentage of total frame count') parser.add_argument('--frameterm', type=int, default=1, help='frame term') args = parser.parse_args() movie = cv2.VideoCapture(args.movie) # get total frame count count = movie.get(cv2.CAP_PROP_FRAME_COUNT) w, h = model_wh(args.resize) if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # get 2d estimation result dfs = pd.DataFrame(index=[]) columns = ['flame', 'human', 'point', 'x', 'y'] # per frame for i in range(0, int(args.frame * count)): _, frame = movie.read() #only get per frameterm if i % int(args.frameterm) != 0: continue
"Neck x, Neck y, Neck score, " + "RShoulder x, RShoulder y, RShoulder score, " + "RElbow x, RElbow y, RElbow score, " + "RWrist x, RWrist y, RWrist score, " + "LShoulder x, LShoulder y, LShoulder score, " + "LElbow x, LElbow y, LElbow score, " + "LWrist x, LWrist y, LWrist score, " + "RHip x, RHip y, RHip score, " + "RKnee x, RKnee y, RKnee score, " + "RAnkle x, RAnkle y, RAnkle score, " + "LHip x, LHip y, LHip score, " + "LKnee x, LKnee y, LKnee score, " + "LAnkle x, LAnkle y, LAnkle score, " + "REye x, REye y, REye score, " + "LEye x, LEye y, LEye score, " + "REar x, REar y, REar score, " + "LEar x, LEar y, LEar score, " + "Background x, Background y, Background score\n") if FLIP: e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(368, 432)) else: e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(432, 368)) for is_variable, data_type in enumerate(data_types): files = os.listdir(path + data_type) files.sort() for image_name in files: t = time.time() split_image_name = image_name.split(".") if split_image_name[-1] != "jpg": continue index = int(image_name.split(".")[0]) img = common.read_imgfile(path + data_type + "/" + image_name, None, None) humans = e.inference(img,
type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') parser.add_argument('--tensorrt', type=str, default="False", help='for tensorrt process.') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resize) if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h), trt_bool=str2bool(args.tensorrt)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368), trt_bool=str2bool(args.tensorrt)) logger.debug('cam read+') cam = cv2.VideoCapture(args.video) ret_val, image = cam.read() logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0])) while True: ret_val, image = cam.read() logger.debug('image process+') humans = e.inference(image,
logger = logging.getLogger('TfPoseEstimatorRun') logger.handlers.clear() logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) if __name__ == '__main__': w, h = model_wh('432x368') if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path( '/content/myWorkspace/tf-pose-estimation/models/graph/cmu'), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path('cmu'), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile('images/handsup1.jpg') t = time.time() r = 4.0 humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=r) elapsed = time.time() - t #logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))
class Terrain(object): def __init__(self): """ Initialize the graphics window and mesh surface """ # setup the view window self.app = QtGui.QApplication(sys.argv) self.window = gl.GLViewWidget() self.window.setWindowTitle('Terrain') self.window.setGeometry(0, 110, 1920, 1080) self.window.setCameraPosition(distance=30, elevation=12) self.window.show() gx = gl.GLGridItem() gy = gl.GLGridItem() gz = gl.GLGridItem() gx.rotate(90, 0, 1, 0) gy.rotate(90, 1, 0, 0) gx.translate(-10, 0, 0) gy.translate(0, -10, 0) gz.translate(0, 0, -10) self.window.addItem(gx) self.window.addItem(gy) self.window.addItem(gz) model = 'mobilenet_thin' camera = 0 self.lines = {} self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]] w, h = model_wh('432x368') self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) self.cam = cv2.VideoCapture(camera) ret_val, image = self.cam.read() matlabfile = os.path.join(os.getcwd(), 'tf-pose-estimation', 'prob_model_params.mat') self.poseLifting = Prob3dPose(matlabfile) keypoints = self.mesh(image) self.points = gl.GLScatterPlotItem(pos=keypoints, color=pg.glColor((0, 255, 0)), size=15) self.window.addItem(self.points) for n, pts in enumerate(self.connection): self.lines[n] = gl.GLLinePlotItem(pos=np.array( [keypoints[p] for p in pts]), color=pg.glColor((0, 0, 255)), width=3, antialias=True) self.window.addItem(self.lines[n]) def mesh(self, image): image_h, image_w = image.shape[:2] width = 640 height = 480 pose_2d_mpiis = [] visibilities = [] humans = self.e.inference(image, upsample_size=4.0) for human in humans: pose_2d_mpii, visibility = common.MPIIPart.from_coco(human) pose_2d_mpiis.append([(int(x * width + 0.5), int(y * height + 0.5)) for x, y in pose_2d_mpii]) visibilities.append(visibility) pose_2d_mpiis = np.array(pose_2d_mpiis) visibilities = np.array(visibilities) transformed_pose2d, weights = self.poseLifting.transform_joints( pose_2d_mpiis, visibilities) pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights) keypoints = pose_3d[0].transpose() return keypoints / 80 def update(self): """ update the mesh and shift the noise each time """ ret_val, image = self.cam.read() try: keypoints = self.mesh(image) except AssertionError: print('body not in image') else: self.points.setData(pos=keypoints) for n, pts in enumerate(self.connection): self.lines[n].setData(pos=np.array([keypoints[p] for p in pts])) def start(self): """ get the graphics window open and setup """ if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'): QtGui.QApplication.instance().exec_() def animation(self, frametime=10): """ calls the update method to run in a loop """ timer = QtCore.QTimer() timer.timeout.connect(self.update) timer.start(frametime) self.start()
default='0x0', help='if provided, resize images before they are processed. ' 'default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ') parser.add_argument( '--resize-out-ratio', type=float, default=4.0, help= 'if provided, resize heatmaps before they are post-processed. default=1.0' ) args = parser.parse_args() w, h = model_wh(args.resize) if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) # estimate human poses from a single image ! image = common.read_imgfile(args.image, None, None) if image is None: logger.error('Image can not be read, path=%s' % args.image) sys.exit(-1) t = time.time() humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) elapsed = time.time() - t
if __name__ == '__main__': w = 432 h = 368 live_x_dict = {} live_y_dict = {} live_x_arr = [] live_y_arr = [] orig_x_dict = {} orig_y_dict = {} orig_x_arr = [] orig_y_arr = [] e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368), trt_bool=False) logger.debug('Image read 1 +') ############################################## LIVE ######################################## image1 = cv2.imread('INPUTS/input1.jpg') humans1 = e.inference(image1, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) for human in humans1: dict = human.body_parts for k, v in dict.items(): live_x_dict[v.part_idx] = round(v.x, 2) live_y_dict[v.part_idx] = round(v.y, 2)
if __name__ == '__main__': parser = argparse.ArgumentParser(description='tf-pose-estimation run by folder') parser.add_argument('--folder', type=str, default='.\images') parser.add_argument('--resolution', type=str, default='432*368', help='network input resolution. default=432x368') parser.add_argument('--model', type=str, default='cmu', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small') parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]') parser.add_argument('--resize-out-ratio', type=float, default=4.0, help='if provided, resize heatmaps before they are post-processed. default=1.0') args = parser.parse_args() scales = ast.literal_eval(args.scales) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) ## Dataset prepose data_folder = '..\\traffic_pose\\src\\' poses = ['go_straight', 'park_right', 'stop', 'turn_right'] resize_data(data_folder) ## Pose estimation #data_folder = '..\\traffic_pose\\%s_new\\' %(pose) print("___Extracting figures form source foder: %s___" % data_folder) pose_estimate(data_folder, args) # extract keypoints' x, y coordinary src = "../traffic_pose/keypoint_data/" data_reshape(src, poses)
### Loading Person Detector ### person_image_tensor, person_tensor_dict = load_tf_ssd_detection_graph( PATH_TO_PERSON_DETECTION, input_graph=None) main_sess = tf.Session() ### Loading the SVM Classifier for Face ID classification ### with open(CLASSIFIER_PATH_SVM, 'rb') as infile: (model, class_names) = pickle.load(infile) ### Loading the KNN Classifier for Face Recognition Classifier ### with open(CLASSIFIER_PATH_KNN, 'rb') as infile: knn_model = pickle.load(infile) ### Loading the TF Pose Estimator ### w, h = model_wh('432x368') e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h)) with main_sess.as_default(): ### Creating and Loading MTCNN ### pnet, rnet, onet = create_mtcnn(main_sess, None) ### Creating and Loading the Facenet Graph ### images_placeholder, embeddings, phase_train_placeholder = load_tf_facenet_graph( FACENET_MODEL_PATH) ### 0 here means start streaming video from webcam cap = cv2.VideoCapture(0) if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): _, image = cap.read()
# break # except ValueError as e: # print("Please input a number.") n_workers = 2 print("Using 2 workers for thread pool.") futures_q = Queue(maxsize=n_workers) worker_mgr = None th_signal = threading.Event() process_th = None send_th = None exc_info = None exc_thrown = False estimator = TfPoseEstimator( get_graph_path("mobilenet_thin"), target_size=(432, 368), tf_config=tf.ConfigProto(log_device_placement=True)) w, h = model_wh("432x368") s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) HOST = '0.0.0.0' PORT = 8089 conn = None addr = None connected = False def wait_for_connection():
help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small') parser.add_argument( '--show-process', type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') parser.add_argument('--showBG', type=bool, default=True, help='False to show skeleton only.') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) cap = cv2.VideoCapture(args.video) if cap.isOpened() is False: print("Error opening video stream or file") i = 0 while cap.isOpened(): ret_val, image = cap.read() #humans = e.inference(image) humans = e.inference(image, resize_to_default=True, upsample_size=4.0) if not args.showBG: image = np.zeros(image.shape) image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
def abn(): print("abn") speak.Speak('action detection started') # Name of the directory containing the object detection module we're using MODEL_NAME = 'inference_graph' # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt') # Number of classes the object detector can identify NUM_CLASSES = 10 ## Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, tog ether with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Initialize webcam feed video = cv2.VideoCapture(0) ret = video.set(3, 1280) ret = video.set(4, 720) start_time = time.time() w, h = model_wh(args.resize) if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(1280, 720)) while (True): # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value ret, frame = video.read() frame_expanded = np.expand_dims(frame, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.60) #print(np.squeeze(classes),np.squeeze(boxes)) #pose humans = e.inference(frame, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) cv2.putText(frame, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if (time.time() - start_time) > 10: break # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('ProjectInt', frame) fps_time = time.time() # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up video.release() cv2.destroyAllWindows()
def draw_image(image, humans): image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imgDebug = Image.fromarray(img) return imgDebug
def analyze(self): args = parserSetup.parserSetup() w, h = model_wh(args.resize) e = model(w, h, args) pos = position() gripCheckFailed = 0 elbowCheckFailed = 0 #cam = cv2.VideoCapture("C:/Users/Eamonn/Programming/2020-ca400-template-repo/src/GymVisionDesktop/Videos/OverheadGood.mp4") cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() orange_color = (0, 140, 255) while True: ret_val, image = cam.read() humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) pose = humans image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False) if len(pose) > 0: # distance calculations for human in humans: for i in range(len(humans)): try: pos.getPositions(human, image) if gripCheckFailed < 5: if not self.gripWidthCheck(human, image, pos): gripCheckFailed += 1 if gripCheckFailed == 5: cv2.putText(image, "Grip is too wide/too close", (5, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) else: cv2.putText(image, "Good grip width!", (5, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if elbowCheckFailed < 5: if not self.elbowCheck(human, image, pos): elbowCheckFailed += 1 if elbowCheckFailed == 5: cv2.putText( image, "Starting position forearms not vertical", (5, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) else: cv2.putText(image, "Elbows and Wrists aligned, Good", (5, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) except Exception as exs: print(exs) pass cv2.imshow('tf-pose-estimation result', image) if cv2.waitKey(1) == 27: break cv2.destroyAllWindows()
def start_game(config, params): cam = cv2.VideoCapture(0) ret, named_window = cam.read() # hp(health point) attributes hp_x = config.imWidth // 2 + 400 hp_y = config.imHeight // 2 - 345 hp_yy = config.imHeight // 2 - 300 hp_w = 50 hp_h = 42 hp_image = cv2.imread('images/heart.png') score_img = cv2.imread('images/score.png') w = 432 h = 368 e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h), trt_bool=str2bool("False")) while True: params["restart"] = False hp = 10 cur_order = 0 score = 0 game_patterns = [] for i in params["patterns"]: list = [] time1 = i[0] - 3 time2 = i[0] + 1 list.extend([i[0], time1, time2, False, i[10]]) for j in range(1, 10): # j = 1 ~ 9 if i[j]: list.append(tuple([j - 1, i[j] - 1])) game_patterns.append(list) match_list = [] # sets to be checked for scoring; reset each frame start_time = time.time() play_music(params["song"], 0) while True: # game play ret, named_window = cam.read() config.named_window = cv2.resize(named_window, dsize=(1312, 736), interpolation=cv2.INTER_AREA) config.named_window = cv2.flip(config.named_window, 1) print(named_window.shape) humans = e.inference(named_window, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) if not humans: continue human = humans[0] image_h, image_w = config.named_window.shape[:2] centers = [] for i in range(common.CocoPart.Background.value): if i not in human.body_parts.keys(): centers.append((0, 0)) else: body_part = human.body_parts[i] center = (image_w - int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)) centers.append(center) play_time = time.time() - start_time pattern = game_patterns[cur_order] if game_patterns[cur_order][1] < play_time and game_patterns[ cur_order] not in match_list: match_list.append(game_patterns[cur_order]) cur_order += 1 if cur_order > len(game_patterns) - 1: cur_order = len(game_patterns) - 1 if match_list: match_list = match(config, match_list, centers, hp, play_time, score) if match_list and match_list[0][2] < play_time: # and 아직 있으면 hp -= 1 del match_list[0] cv2.putText(config.named_window, 'score:', (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 300)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) cv2.putText(config.named_window, '%d' % score, (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 250)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) if cur_order == len(game_patterns): config.named_window = score_img clear_menu(params, score) if cv2.waitKey(1) & 0xFF == ord('p'): params["exit"] = True if hp <= 0 or play_time > game_patterns[len(game_patterns) - 1][2] + 5: mixer.music.stop() death_menu(params) if params["exit"] == True: break if params["restart"] == True: break if params["menu"] == True: break for i in range(hp): if i < 5: show_hp(config.named_window, hp_image, hp_x + i * hp_w, hp_y, hp_w, hp_h) if i >= 5: show_hp(config.named_window, hp_image, hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h) cv2.imshow('McgBcg', config.named_window) if params["exit"] == True: break if params["menu"] == True: break
class PoseEstimator(object): """ The PoseEstimator class manages all operations related to Pose Estimation. It acts as a wrapper on top of TfPoseEstimator implement inside openpose model. The class supplies human pose coorinates to requestor objects. """ resize_out_ratio = 4.0 # no of relevance, kept for sake of completeness def __init__(self, resize='0x0', model='mobilenet_thin'): self.humans = None # list of humans with pose info self.image = None self.bboxes = [] # list of bbox [x1, y1, x2, y2] """ Two available models are cmu & mobilenet_thin if running in CPU mode only, then mobilenet_thin is recommended. Default fetches mobilenet_thin """ self.model = model """ if resize value is provided, it will resize images before they are processed. default=0x0, Recommends: 432x368 or 656x368 or 1312x736 """ self.w, self.h = model_wh(resize) self.loadModel() def loadModel(self): """ Loads the cmu or mobilenet model in memory """ try: if self.w == 0 or self.h == 0: self.e = TfPoseEstimator(get_graph_path(self.model), target_size=(432, 368)) else: self.e = TfPoseEstimator(get_graph_path(self.model), target_size=(self.w, self.h)) except MemoryError: print("couldn't load model into memory...") def infer(self, image): """ calls the inference API inside tf_pose (openpose) returning the poses of humans and drawing the skeleton on image frame """ self.image = image if self.image is None: raise Exception('The image is not valid. check your image') self.humans = self.e.inference(self.image, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=self.resize_out_ratio) self.image = TfPoseEstimator.draw_humans(self.image, self.humans, imgcopy=False) return self.image def getHumans(self): return self.humans def getImage(self): return self.image def _normalize_values(self, width, height): if self.w == 0: width = width * 432 else: width = width * self.w if self.h == 0: height = height * 368 else: height = height * self.h return width, height def getBboxes(self): return self.bboxes def getKeypoints(self): """ Returns a list of keypoints of all the persons in a frame keypt_list = [keypts1, keypts2] keypts = [x1, y1, score, ...] """ keypt_list = [] for human in self.humans: keypts = [] for key, values in human.body_parts.items(): # print (key, 'x val %.2f' % values.x, 'y val %.2f' % values.y) # print ('values.part_idx, values.uidx ', # values.part_idx, values.uidx) x, y = self._normalize_values(values.x, values.y) keypts.extend([x, y, values.score]) keypt_list.append(keypts) #print (keypt_list) return keypt_list def _updateBboxes(self): self.bboxes = [] for human in self.humans: min_x, min_y = math.inf, math.inf max_x, max_y = -1, -1 bbox = [min_x, min_y, max_x, max_y] for key, values in human.body_parts.items(): if values.x < min_x: min_x = values.x if values.y < min_y: min_y = values.y if values.x > max_x: max_x = values.x if values.y > max_y: max_y = values.y bbox = [min_x, min_y, max_x, max_y] self.bboxes.append(bbox) def showResults(self): """ utility method for debug purposes, not called from anywhere """ #print (self.humans) cv2.imshow('tf-pose-estimation result', self.image) cv2.waitKey(0) cv2.destroyAllWindows()
# import sys # sys.path.insert(0, './action') # from tf_pose import common from tf_pose.estimator import TfPoseEstimator from tf_pose.networks import get_graph_path, model_wh w, h = 432, 368 parts = ['nose', 'neck', 'r_shoulder', 'r_elbow', 'r_wrist', 'l_shoulder', 'l_elbow', 'l_wrist', 'r_hip', 'r_knee', 'r_ankle', 'l_hip', 'l_knee', 'l_ankle', 'r_eye', 'l_eye', 'r_ear', 'l_ear'] gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75, allow_growth=True) tf_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(w, h), tf_config=tf_config) def action_classifie(img): img_shape = img.shape humans = e.inference(img, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) bodys_pos = [] for human in humans: temp = {} for i in range(len(parts)): if i not in human.body_parts.keys(): continue body_part = human.body_parts[i] temp[parts[i]] = body_part
#Number of the frames in the video: length = int(cap.get(cv.CAP_PROP_FRAME_COUNT)) #Check for the right model, change to its directory and import the required files: print("CHOOSING THE MODEL") if args.model == 'mobilenet_thin' or args.model == 'cmu' or args.model == 'mobilenet_fast' or args.model == 'mobilenet_accurate': sys.path.insert(0, '../tf-openpose') from tf_pose.estimator import TfPoseEstimator from tf_pose.networks import get_graph_path, model_wh logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) #e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) if w == 0 or h == 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) elif args.model == 'caffe': start_load = time.time() sys.path.append( '/home/y17bendo/stage2018/Realtime_Multi-Person_Pose_Estimation/testing/python' ) import pylab as plt from PIL import Image import processing_image from processing_image import import_param, processing #Importing the parameters of the net, and loading the net: param, model, net = import_param() print("loading the net took : %f" % (time.time() - start_load))
import logging import time import ast from tf_pose import common import cv2 import numpy as np from tf_pose.estimator import TfPoseEstimator from tf_pose.networks import get_graph_path, model_wh from tf_pose.lifting.prob_model import Prob3dPose from tf_pose.lifting.draw import plot_pose #Dataset #import scipy.io #dataset = scipy.io.loadmat('joints.mat') #modelo e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368)) #imagenes image = common.read_imgfile('./dataset_images/im0001.jpg', None, None) #inferencia t = time.time() w = 0 h = 0 humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=1.0) elapsed = time.time() - t print('inference image: %s in %.4f seconds.' % ('im0001', elapsed)) #print(humans)
class Tracker: """ attributes: people = dict of people, key is their uuid names = dict of names, key is uuid frame_count = count of frames tracked scan_every_n_frames = # of frames between person scanning their face maximum_difference_to_match = the maximum average differnece of points to mark it as not the same pose estimator = TfPoseEstimator poses = [] of poses from TfPoseEstimator from current frame faces = [] positions of faces from current frame save_faces_to - None if disabled, a string for an existing dir if enabled """ def __init__(self): self.frame_count = 0 self.scan_every_n_frames = 120 self.max_face_scans = 5 self.maximum_difference_to_match = 0.08 self.names = {} self.people = {} self.estimator = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(432, 368)) self.encodings = {} self.save_faces_to = None def load_encodings(self, filepath): self.encodings = pickle.loads(open("./encodings.p", "rb").read()) def save_encodings(self, filepath): encoding_file = open(filepath, "wb") encoding_file.write(pickle.dumps(self.encodings)) encoding_file.close() def create_encodings(self, faces_directory): facedirs = [ filename for filename in os.listdir(faces_directory) if not isfile(join(faces_directory, filename)) ] faces = dict.fromkeys(facedirs, []) encodings = {} for name in facedirs: faces[name] = [] for filepath in [ filename for filename in os.listdir(join(faces_directory, name)) if isfile(join(faces_directory, name, filename)) ]: faces[name].append(join(faces_directory, name, filepath)) for name in faces: encodings[name] = [] for filepath in faces[name]: try: image = cv2.imread(filepath) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) encoding = face_recognition.face_encodings( image, [(0, 0, image.shape[0], image.shape[1])]) encodings[name].append(encoding[0]) except: continue self.encodings = encodings def get_pose(self, image): w = 432 h = 368 return self.estimator.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) def draw_output(self, image, draw_body=True, draw_face=True, draw_label=True): if draw_body: poses = [] for person in self.people: if person.is_visible: poses.append(person.pose) TfPoseEstimator.draw_humans(image, poses, imgcopy=False) for person in self.people: if not self.people[person].is_visible: continue if draw_face: top, left, bottom, right = self.people[person].face top = math.floor(top * image.shape[0]) bottom = math.floor(bottom * image.shape[0]) left = math.floor(left * image.shape[1]) right = math.floor(right * image.shape[1]) cv2.rectangle(image, (left, top), (right, bottom), (0, 0, 255), 2, 0) if draw_label: cv2.putText(image, self.people[person].id, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255)) return image def scan_face(self, image, person): if not person.is_visible: return top, left, bottom, right = person.face top = math.floor(top * image.shape[0]) left = math.floor(left * image.shape[1]) bottom = math.floor(bottom * image.shape[0]) right = math.floor(right * image.shape[1]) encoding = face_recognition.face_encodings( image, [(top, right, bottom, left)]) if len(encoding) <= 0: return encoding = encoding[0] person.set_encoding(encoding) def compare_known_faces(self, person): if len(list(self.encodings.keys())) is 0: return name_key = [] encodings = [] counts = {} for name in self.encodings: results = face_recognition.compare_faces(self.encodings[name], person.encodings[-1]) match_count = results.count(True) counts[name] = match_count if match_count / len(results) >= 0.75: break biggest_match = max(counts, key=counts.get) if (counts[biggest_match] <= 3): return None else: return biggest_match # Handed a frame to process for tracking def process_frame(self, image): self.frame_count += 1 #1 - Generate all the poses self.poses = self.get_pose(image) #3 - Tick each person for person in self.people: self.people[person].tick() #2 - see if the pose is someone we've seen in our people, # or if it's someone new to create a new person object for new_people = [] for pose in self.poses: handled = False for person in self.people: difference = self.people[person].distance_from_pose(pose) if difference < self.maximum_difference_to_match: self.people[person].update(pose) handled = True break if handled: continue else: #Create a new person person = Person() person.update(pose) new_people.append(person) for person in new_people: self.people[person] = person #4 - Now that we've generated the people, "tock" through all people # in order to have their decay occur for person in self.people: #scan the face of all new people if person in new_people: self.scan_face(image, person) #Scan the face of everyone else that hasnt been scanned for #self.scan_every_n_frames if person.is_visible and person.last_face_scan % self.scan_every_n_frames == 0 and len( person.encodings) < self.max_face_scans: self.scan_face(image, person) if person.last_face_scan == 0: id = self.compare_known_faces(person) older_person = self.people.get(id) if older_person is not None: person[id] = person if id is not None: person.id = id elif self.save_faces_to: person.save_face(image, self.save_faces_to) self.people[person].tock()
parser.add_argument('--resize', type=str, default='0x0', help='if provided, resize images before they are processed. default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ') parser.add_argument('--resize-out-ratio', type=float, default=4.0, help='if provided, resize heatmaps before they are post-processed. default=1.0') parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small') parser.add_argument('--show-process', type=bool, default=False, help='for debug purpose, if enabled, speed for inference is dropped.') parser.add_argument('--tensorrt', type=str, default="False", help='for tensorrt process.') args = parser.parse_args() #args= {'resize':'432x368', 'model':'mobilenet_thin','camera':0,'resize_out_ratio':4.0,'video':"output.avi"} w, h = model_wh(args.resize) if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368)) out = False cam = cv2.VideoCapture(args.camera) ret_val, image = cam.read() cam_store = cv2.VideoCapture(args.video) ret_val_store, image_store = cam_store.read() while cam_store.isOpened(): dist = True ret_val_store, image_store = cam_store.read() print("-------Next Pose Applied-------") while dist: ret_val, image = cam.read()
help='for debug purpose, if enabled, speed for inference is dropped.') parser.add_argument('--showBG', type=bool, default=True, help='False to show skeleton only.') parser.add_argument('--tensorrt', type=str, default="False", help='for tensorrt process.') args = parser.parse_args() logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) w, h = model_wh(args.resolution) e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h), trt_bool=str2bool(args.tensorrt)) cap = cv2.VideoCapture(args.video) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc, cap.get(cv2.CAP_PROP_FPS), (640, 480)) count = 0 t_netfps_time = 0 t_fps_time = 0 if cap.isOpened() is False: print("Error opening video stream or file") try: while cap.isOpened(): fps_time = time.time()
def start_game(config, params): #게임 들어가기 전 필요한 변수들 초기화 cam = cv2.VideoCapture(0) ret, named_window = cam.read() # 실루엣 맞추기: 카메라 키고, (사진 띄우고, point 4개 범위 안에 들어오면) X 3번 loop 나가 # sil = ["1.png", "2.png", "3.png"] # 이런 식 # 게임 시작: clear_menu, pause_menu, death_menu 중에 하나로 끝남 pause_img = cv2.imread('images/pause.png') score_img = cv2.imread('images/score.png') gameover_img = cv2.imread('images/gameover.png') # 목숨 관련 변수들 hp_x = config.imWidth // 2 + 400 hp_y = config.imHeight // 2 - 345 hp_yy = config.imHeight // 2 - 300 hp_w = 50 hp_h = 42 hp_image = cv2.imread('images/heart.png') w = 432 h = 368 e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h), trt_bool=str2bool("False")) global score while True: # restart 하면 여기로 돌아오지 (실루엣 다시 안 해도 됨) params["restart"] = False hp = 10 # death까지의 목숨(?) (10번 못 맞추면 death_menu) cur_order = 0 # params score = 0 game_patterns = [] # 재구성할 리스트 #엑셀에서 불러 온 값 for i in params[ "patterns"]: # ex) i = [4.,0 0, 0, 3, 0, 0, 12, 0, 0, 0] 여기 ~ 89 !! list = [] if i[10]: #포즈를 위해서 i[10]이 true면 포즈 있는거여서 포즈 취할 시간줌 => 필요없음 time1 = i[0] - 6.6 time2 = i[0] else: #포즈 없는 경우 -> 원에 사람의 bodypoint touch할 시간의 범위를 줌 time1 = i[0] - 3 # 여기 ~ 81!! time2 = i[0] + 1 list.extend([i[0], time1, time2, False, i[10]]) #원래는i[0]시간인데 time1~time2시간의 범위를 주겠다 # 구역 9개에 대해서 리스트에다가 (영역, 부위) 튜플을 원소로 append for j in range(1, 10): # j = 1 ~ 9 if i[j]: #0이 아니면...원이 나와야됨 list.append(tuple([j - 1, i[j] - 1 ])) #excel에서 초시간때문에 구역 번호랑 -1차이 -> j-1 game_patterns.append( list) #i[j]-1 : excel에 잘못 적음->일일이 고치기 귀찮아서 -> i[j]-1 # params["patterns"][0] = [4,0, 0, 0, 3, 0, 0, 12, 0, 0, 0] # -> game_patterns[0] = [4.0, 3.5, 4.2, False, (2, 2), (5, 11)] (구역번호, 부위번호) match_list = [] # 주어진 시간 안에 해당되는, match 해볼 규칙들 #a = input('Press...') start_time = time.time() resume_time = 0.0 resume_start = 0.0 play_music(params["song"], 0) while True: # game play ret, named_window = cam.read() config.named_window = cv2.resize(named_window, dsize=(1312, 736), interpolation=cv2.INTER_AREA) config.named_window = cv2.flip(config.named_window, 1) print(named_window.shape) humans = e.inference(named_window, resize_to_default=(w > 0 and h > 0), upsample_size=4.0) # 4 / 1 ?? if not humans: continue human = humans[0] image_h, image_w = config.named_window.shape[:2] #Human 클래스의 add_pair 함수(estimator.py의 62줄)로 포인트를 파악하고, 파악한 좌표를 centers 리스트에 저장 #->머리부터 발끝까지의 키 포인트들이 화면에 표시됩니다. centers = [] for i in range(common.CocoPart.Background.value): #18번 if i not in human.body_parts.keys(): centers.append((0, 0)) else: body_part = human.body_parts[i] center = (image_w - int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)) centers.append(center) #사람의 keypoint받아서 화면에 출력 # 실루엣 play_time = time.time() - start_time # 플레이 시간 측정 pattern = game_patterns[cur_order] # 어떤 규칙이 time1을 지나면 & 아직 match_list에 없으면(= 첫번째 조건 만족해도 중복 append 방지 위해) #game_patterns[cur_order][1]는 맞춰야 하는 시간 범위의 최솟값 && match_list에 없으면.... if game_patterns[cur_order][1] < play_time and game_patterns[ cur_order] not in match_list: match_list.append(game_patterns[cur_order]) # cur_pattern = Pattern() cur_order += 1 if cur_order > len( game_patterns ) - 1: #이 조건을 만족하면 게임이 끝난것 ->cur_order고정 -> game 종료 cur_order = len(game_patterns) - 1 if match_list: #matchlist에 원소가 하나라도 있으면 아래 인자들 match함수에 넘겨줌 # centers resize, flip i = [4.0, 3.5, 4.2, F, 0 or PATH, (2, 3), (5, 12)] # 여기 ~ 33 ! match_list = match(config, match_list, centers, hp, play_time) #=> 위에 match 함수 가기~!!!! if match_list and match_list[0][ 2] < play_time: # and 아직 있으면 #터치해야 할 시간 지났음 -> 목숨 하나 빼기 hp -= 1 del match_list[ 0] # 고침!! 항상 [0]일 테니끼 right? #끝나면 match_list에서 지우니까 항상 [0]지움 # match_list.remove(game_patterns[cur_order]) 도 됨 cv2.putText(config.named_window, 'score:', (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 300)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) #실시간으로 점수 보여주기 cv2.putText(config.named_window, '%d' % score, (int(config.imWidth / 2 - 600), int(config.imHeight / 2 - 250)), cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7, cv2.LINE_8) if cur_order == len( game_patterns ): # 이런 식 #게임이 끝났으면(재구성한 list가) -> clear_menu보여주기 config.named_window = score_img clear_menu(params, score) if cv2.waitKey(1) & 0xFF == ord('p'): params["exit"] = True if hp <= 0 or play_time > game_patterns[len(game_patterns) - 1][2] + 5: #마지막 game_patterns의 터치 허용 범위 시간이 지나고도 5초뒤 mixer.music.stop() death_menu(params) #죽음 if params["exit"] == True: break if params["restart"] == True: # 같은 게임 다시 시작 break if params["menu"] == True: break for i in range(hp): if i < 5: #실시간으로 변하는 window에 hp합성 show_hp(config.named_window, hp_image, hp_x + i * hp_w, hp_y, hp_w, hp_h) if i >= 5: #2줄로 만들었음 show_hp(config.named_window, hp_image, hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h) cv2.imshow('McgBcg', config.named_window) #image_h, image_w if params["exit"] == True: break if params["menu"] == True: break
if not image_topic: rospy.logerr('Parameter \'camera\' is not provided.') sys.exit(-1) try: w, h = model_wh(resolution) graph_path = get_graph_path(model) rospack = rospkg.RosPack() graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path) except Exception as e: rospy.logerr('invalid model: %s, e=%s' % (model, e)) sys.exit(-1) pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h)) cv_bridge = CvBridge() rospy.Subscriber(image_topic, Image, callback_image, queue_size=1, buff_size=2**24) pubs = { j: rospy.Publisher("/fyp/pose/{}".format(j), Point2D, queue_size=1) for j in PARTS } rospy.loginfo('start+') rospy.spin() rospy.loginfo('finished')
'--resize-out-ratio', type=float, default=4.0, help= 'if provided, resize heatmaps before they are post-processed. default=1.0' ) args = parser.parse_args() (sess, accuracy, pred, optimizer) = initialize_variables() print(pred) logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model))) #w, h = model_wh(args.resolution) #e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) w, h = model_wh(args.resize) if w > 0 and h > 0: e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h)) else: e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368)) cap = cv2.VideoCapture(args.video) frame_number = 0 sequence_arr = [] if cap.isOpened() is False: print("Error opening video stream or file") while cap.isOpened(): ret_val, image = cap.read() humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio) #humans = e.inference(image) #print("@@@@", humans) if not args.showBG:
class Detector(): def __init__(self, target_ip): self.CWD_PATH = os.getcwd() self.CWD_PATH = os.path.abspath(os.path.join(self.CWD_PATH, os.pardir)) self.CWD_PATH = os.path.join(self.CWD_PATH, '3_BRobot') # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' PATH_TO_CKPT = os.path.join(self.CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(self.CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.right_clicks = [] # self.right_clicks = [[375, 41], [1000, 709]] # mouse callback function def mouse_callback(event, x, y, flags, params): #right-click event value is 2 if event == 2: if len(self.right_clicks) < 2: self.right_clicks.append([x, y]) else: self.right_clicks = [[x, y]] print(self.right_clicks) CAM_ID = 1 self.cam = cv2.VideoCapture(int(CAM_ID)) self.window_name = 'Cam' + str(CAM_ID) cv2.namedWindow(self.window_name) cv2.setMouseCallback(self.window_name, mouse_callback) self.prevTime = 0 self.window_size = (1312, 736) if self.cam.isOpened() == False: print('Can\'t open the CAM(%d)' % (CAM_ID)) exit() self.face_queue = Queue() self.gender_queue = Queue() self.age_queue = Queue() self.process_gender = Process(target=gender_estimate, args=(self.face_queue, self.gender_queue)) self.process_gender.start() self.process_age = Process(target=age_estimate, args=(self.face_queue, self.age_queue)) self.process_age.start() self.w = self.window_size[0] self.h = self.window_size[1] self.e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(self.w, self.h)) def detect_objects(self, image_np, sess, detection_graph, mot_tracker, img_to_color, face_detect, face_queue, gender_queue, age_queue): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) trackers = mot_tracker.update(boxes[0]) person_ids = [i for i, e in enumerate(classes[0]) if e == 1] person_attr = {'age': 'NA', 'gender': 'NA', 'color': 'NA'} if len(person_ids) > 0: selected_person_id = person_ids[0] person_box = boxes[0][selected_person_id] person_score = scores[0][selected_person_id] person_tracker = trackers[selected_person_id] if person_score > 0.6: def get_color(q, img): try: start_time = time.monotonic() c = img_to_color.get(img) q.put({"flag": "color", "value": c}) elapsed_time = time.monotonic() - start_time print("Color", elapsed_time) except: q.put({"flag": "color", "value": False}) def detect_face(q, img, face_detect, face_queue, gender_queue, age_queue): start_time = time.monotonic() # your code files = [] faces, face_files, rectangles, tgtdir = face_detect.run( img) face_queue.put([face_files, img, tgtdir]) face_queue.put([face_files, img, tgtdir]) person_gender = gender_queue.get() person_age = age_queue.get() print("gender rcvd", person_gender) print("Age rcvd", person_age) q.put({"flag": "gender", "value": person_gender}) q.put({"flag": "age", "value": person_age}) elapsed_time = time.monotonic() - start_time print("Age/Gender", elapsed_time) person_img = crop_img(image_np, person_box) q = Queue() procs = [] process_color = Process(target=get_color, args=( q, person_img, )) procs.append(process_color) process_face = Process(target=detect_face, args=(q, person_img, face_detect, face_queue, gender_queue, age_queue)) procs.append(process_face) for proc in procs: proc.start() results = [] for proc in procs: results.append(q.get()) results.append(q.get()) for proc in procs: proc.join() for result in results: person_attr[result['flag']] = result['value'] # print(person_attr) # override boxes boxes = np.expand_dims(person_box, axis=0) classes = [1] scores = np.expand_dims(person_score, axis=0) trackers = np.expand_dims(person_tracker, axis=0) person_attr = [person_attr] # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, boxes, classes, scores, trackers, person_attr, self.category_index, use_normalized_coordinates=True, line_thickness=3) return image_np, person_attr def detect_start(self): with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph) as sess: # Load modules mot_tracker = Sort() npz = np.load('./bin/color_extractor/color_names.npz') img_to_color = ImageToColor(npz['samples'], npz['labels']) face_detect = face_detection_model( 'dlib', './bin/age_gender/Model/shape_predictor_68_face_landmarks.dat' ) person_attr = False while (True): ret, frame = self.cam.read() # Detection if len(self.right_clicks) == 2: print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) try: image_process, person_attr = self.detect_objects( cropped_img, sess, self.detection_graph, mot_tracker, img_to_color, face_detect, self.face_queue, self.gender_queue, self.age_queue) print("####", person_attr) if isinstance(person_attr, list): if person_attr[0][ 'gender'] != 'NA' and person_attr[0][ 'gender'] != False: break else: if person_attr[ 'gender'] != 'NA' and person_attr[ 'gender'] != False: break except Exception as e: print(e) pass curTime = time.time() sec = curTime - self.prevTime self.prevTime = curTime fps = 1 / (sec) str1 = "FPS : %0.1f" % fps str2 = "Testing . . ." cv2.putText(frame, str1, (5, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0)) cv2.putText(frame, str2, (100, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0)) cv2.imshow(self.window_name, frame) if cv2.waitKey(1) & 0xFF == ord('q'): self.detect_stop() break # plt.figure(figsize=IMAGE_SIZE) # plt.imshow(image_process) # plt.show() if person_attr: return person_attr else: return False def detect_stop(self): self.cam.release() # cv2.destroyWindow(self.window_name) cv2.destroyAllWindows() # self.process_gender.join() # self.process_age.join() print("Detect Stop") return True def pose_start(self): print("Pose Start") result = False while (True): ret, frame = self.cam.read() cropped_img = None if len(self.right_clicks) == 2: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) humans = self.e.inference(frame, resize_to_default=(self.w > 0 and self.h > 0), upsample_size=4.0) if len(humans) > 0: if 7 in humans[0].body_parts or 4 in humans[0].body_parts: print("Hands Detected") result = 1 break image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False) cv2.imshow('tf-pose-estimation result', image) cv2.imshow(self.window_name, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) if cv2.waitKey(1) & 0xFF == ord('q'): break return result def hands_detect_start(self): print("Hands detection start") result = False im_width = 320 im_height = 180 self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, im_width) self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, im_height) im_width, im_height = (self.cam.get(3), self.cam.get(4)) score_thresh = 0.2 # max number of hands we want to detect/track num_hands_detect = 2 while True: ret, frame = self.cam.read() cropped_img = None if len(self.right_clicks) == 2: # resized_frame = cv2.resize(frame, (im_width, im_height)) # print(self.right_clicks) _y, _x, _d = frame.shape [_c1, _c2] = self.right_clicks crop_box = [ _c1[0] / _x, _c1[1] / _y, _c2[0] / _x, _c2[1] / _y, ] cropped_img = crop_img(frame, crop_box) # actual detection boxes, scores = detector_utils.detect_objects( frame, self.hands_detection_graph, self.hands_detection_sess) # Hands 위치 포지션 체크 # 핸들 영역에 들어오면 리턴해서 게임 플레이 # break # draw bounding boxes detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, frame) cv2.imshow('Hands Detection', frame) cv2.imshow(self.window_name, frame) if cv2.waitKey(25) & 0xFF == ord('q'): break return result