Python TfPoseEstimator.inferenceの例、tf_pose.estimator.TfPoseEstimator.inference Pythonの例

コード例 #1

0

ファイルを表示

                    help='False to show skeleton only.')
parser.add_argument('--savefile', type=str, required=True, help='저장할 파일 이름')
args = parser.parse_args()
logger.debug('initialization %s : %s' %
             (args.model, get_graph_path(args.model)))
w, h = model_wh(args.resolution)
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
cap = cv2.VideoCapture("/home/educon/data2/video/" + args.video)
if cap.isOpened() is False:
    print("Error opening video stream or file")
while cap.isOpened():
    tmp = [[] for _ in range(18 * 3)]
    ret_val, image = cap.read()

    if ret_val:
        humans = e.inference(image, upsample_size=4.0)
        if humans:
            #save data generated by tfpose
            for i in range(0, 18):
                if i not in humans[0].body_parts.keys():
                    tmp[3 * i] = 0
                    tmp[3 * i + 1] = 0
                    tmp[3 * i + 2] = 0
                else:
                    #detect human
                    tmp[3 * i] = humans[0].body_parts[i].x
                    tmp[3 * i + 1] = humans[0].body_parts[i].y
                    tmp[3 * i + 2] = humans[0].body_parts[i].score
            data.append(tmp)

        if not args.showBG:

コード例 #2

0

ファイルを表示

boundary_frames = 0
try:
    i = 0
    cap = cv2.VideoCapture(0)
    current_clip = np.zeros((frame_per_clip, required_landmarks_count, 2))
    while True:
        ret, frame = cap.read()
        if not ret:
            cap.release()
            break
        #frame = convert_landscape_potrait(frame)
        start_time = time.time()
        #frame = cv2.resize(frame, (640, 480))
        frame_counter += 1
        # check if boundary frames has been skipped and it is the desired frame like 3rd or 5th etc
        humans = e.inference(frame, upsample_size=4.0)
        max_pro_human = 0
        if len(humans) == 0:
            print("No Human Detected")
        else:
            if len(humans) > 1:
                # only choose the one with higher score
                max_avg_score = 0
                for h in range(len(humans)):
                    human = humans[h]
                    avg = 0
                    for hb in human.body_parts:
                        avg += human.body_parts[hb].score
                    avg = avg / len(human.body_parts)
                    print("Score%d\t%f" % (h, avg))
                    if avg > max_avg_score:

コード例 #3

0

ファイルを表示

ファイル: run.py プロジェクト: perets-dev/gait_recognition

    # parser.add_argument('--image', type=str, default='/Users/ildoonet/Downloads/me.jpg')
    parser.add_argument('--image', type=str, default='./images/apink2.jpg')
    # parser.add_argument('--model', type=str, default='mobilenet_320x240', help='cmu / mobilenet_320x240')
    parser.add_argument('--model', type=str, default='mobilenet_thin_432x368', help='cmu_640x480 / cmu_640x360 / mobilenet_thin_432x368')
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    #scales = ast.literal_eval(scales)

    w, h = model_wh(args.model)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=[None])
    # humans = e.inference(image, scales=[None, (0.7, 0.5, 1.8)])
    # humans = e.inference(image, scales=[(1.2, 0.05)])
    # humans = e.inference(image, scales=[(0.2, 0.2, 1.4)])
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = cv2.imread(args.image, cv2.IMREAD_COLOR)
    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    cv2.imshow('tf-pose-estimation result', image)
    cv2.waitKey()

    #import sys
    #sys.exit(0)

コード例 #4

0

ファイルを表示

    parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')
    parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
    parser.add_argument('--show-process', type=bool, default=False,
                        help='for debug purpose, if enabled, speed for inference is dropped.')
    parser.add_argument('--showBG', type=bool, default=True, help='False to show skeleton only.')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    cap = cv2.VideoCapture(args.video)

    if cap.isOpened() is False:
        print("Error opening video stream or file")
    while cap.isOpened():
        ret_val, image = cap.read()

        humans = e.inference(image)
        if not args.showBG:
            image = np.zeros(image.shape)
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10),  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        fps_time = time.time()
        if cv2.waitKey(1) == 27:
            break

    cv2.destroyAllWindows()
logger.debug('finished+')

コード例 #5

0

ファイルを表示

                    count = count+1
            else:
                pose_reinit = True
        else:
            pose_reinit = True

        if frames_skiped >= 5:
            pose_reinit = True      

        prevgray = gray_opt

        ch = 0xFF & cv.waitKey(1)
        if ch == 27:
            break
        if ch == ord('1') or pose_reinit:
            humans = e.inference(gray_pose, True, 2)
                     #run_in_separate_process(e.inference,[e,gray_pose, False, 2])
            tr_points_temp = tr_points
            tr_points = get_pose_points(humans)
            if tr_points:
                for tr_idx in tr_points:
                    tr_points[tr_idx] = np.array(tr_points[tr_idx])*tuple(reversed(gray_opt.shape[:2]))          
                    #p0 = tr_points

            else:
                tr_points = tr_points_temp
            
            frames_skiped = 0
            pose_reinit = False

コード例 #6

0

ファイルを表示

def single(video_path,
           model='mobilenet_thin',
           verbose=10,
           model_instance=None):
    if model not in [
            'cmu', 'mobilenet_thin', 'mobilenet_v2_large',
            'mobilenet_v2_small', None
    ]:
        raise Exception(
            'Incompatible model chosen! Available models: cmu, mobilenet_thin, mobilenet_v2_large, mobilenet_v2_small'
        )

    cap = cv2.VideoCapture(video_path)
    width = cap.get(3)
    height = cap.get(4)
    fps = cap.get(cv2.CAP_PROP_FPS)

    # future support for batch processing of videos using same instance of TfPoseEstimator
    if model_instance == None:
        model_instance = TfPoseEstimator(get_graph_path(model),
                                         target_size=(result_width,
                                                      result_height))

    points_per_frame = np.array([])
    frameN = 1

    if cap.isOpened() is False:
        raise Exception('Error opening file!')
    while cap.isOpened():
        ret, image = cap.read()

        if image is None:
            break

        image = resize_frame(image)

        humans = model_instance.inference(image, upsample_size=4.0)
        points = TfPoseEstimator.get_points(image, humans)

        _zeros = np.full((points.shape[0], 1), frameN)
        _points = np.c_[_zeros, points]

        if _points.size != 0:
            if points_per_frame.size == 0:
                points_per_frame = _points
            else:
                points_per_frame = np.vstack((points_per_frame, _points))

        #            else:
        #                _zeros = np.array([[0, 0, 0, 0, 'NA', 0]])
        #                _zeros[0, 0] = int(_zeros[0, 0]) + frameN
        #                print(_zeros)
        #                points_per_frame = np.vstack((points_per_frame, _zeros))

        # change level of verbosity
        if verbose == 0:
            print(f"Frame number {frameN} processed")
        else:
            if frameN % verbose == 0:
                print(f"Frame number {frameN} processed")
        frameN += 1
    return points_per_frame

コード例 #7

0

ファイルを表示

class BodyDetect():
    def __init__(self,
                 video_path=None,
                 model='mobilenet_thin',
                 resolution='432x368',
                 showBG=''):
        # model : 'cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small'
        # resoultion : 'network input resolution. default=432x368'
        # showBG : 'Use it with any non-empty string to show skeleton only.'

        self.camera = VideoCamera(video_path)

        # Initialize some variables
        self.w, self.h = model_wh(resolution)
        self.estimator = TfPoseEstimator(get_graph_path(model),
                                         target_size=(self.w, self.h))
        self.showBG = showBG
        self.time_checker = 0.0
        self.best_score = float("inf")  # lower is better, always positive
        self.reference = None  # human object.
        self.last_frame = None

    def __del__(self):
        del self.camera

    def process_frame(self, frame):
        frame = frame.copy()
        # humans = e.inference(frrame, resize_to_default=)
        humans = self.estimator.inference(frame,
                                          resize_to_default=True,
                                          upsample_size=4.0)
        if self.showBG:
            frame = np.zeros(frame.shape)
        # print("before",frame.shape[1], "x", frame.shape[0])
        frame = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False)
        # print("after", frame.shape[1], "x", frame.shape[0])

        return frame

    def eval_frame_with_reference(self, target_frame, reference_frame, time):
        humans = self.estimator.inference(target_frame,
                                          resize_to_default=True,
                                          upsample_size=4.0)

        current_score = TfPoseEstimator.get_pose_fit_score(
            humans, reference_frame)
        # print("current_score", current_score, "best_score", self.best_score, "check time", time)
        if current_score < self.best_score:
            self.best_score = current_score
            self.time_checker = time
        return time

    def get_jpg_bytes(self):
        # Grab a single frame of video
        frame = self.camera.get_frame()
        if frame is None:
            return
        frame = self.process_frame(frame)
        # We are using Motion JPEG, but OpenCV defaults to capture raw images,
        # so we must encode it into JPEG in order to correctly display the
        # video stream.
        _, jpg = cv2.imencode('.jpg', frame)
        return jpg.tobytes()

    # Web cam version (just pose estimate)
    def bodyDetectVideo(self, clip):
        def fl(get_frame, t):
            frame = get_frame(t)
            return self.process_frame(frame)

        return clip.fl(fl)

    # Server processing version (for cut and merge)
    # clip : second video, reference : reference image for cut
    # check first 10 seconds of second video
    def bodyDetectVideo_for_merge(self, clip):
        def fl(get_frame, t):
            frame = get_frame(t)
            if t < 10:
                self.eval_frame_with_reference(frame, self.reference, t)
            return frame

        return clip.fl(fl)

    # One frame to human. Only one human because we modify estimate_paf of estimator.py
    def frame_to_human(self, frame):
        humans = self.estimator.inference(frame,
                                          resize_to_default=True,
                                          upsample_size=4.0)
        if len(humans) != 1:
            # we can do it just one person case
            raise NotImplementedError
        return humans[0]

    # get last frame for clip helper funcion
    def get_last_frame(self, clip):
        def fl(get_frame, t):
            frame = get_frame(t)
            self.last_frame = frame
            return frame

        return clip.fl(fl)

コード例 #8

0

ファイルを表示

ファイル: run_camera.py プロジェクト: wangtiancai/pose_recognition_tello

def run_processing(camera=0,
                   resize='0x0',
                   resize_out_ratio=4.0,
                   model="mobilenet_thin",
                   show_process=False,
                   remote_server=''):
    #################################
    # DECLARE VARIABLES #############
    #################################
    frames_sent_per_sec = time.time()
    fps_time = 0
    pose = "None"
    logger.debug('initialization %s : %s' % (model, get_graph_path(model)))
    w, h = model_wh(resize)
    t = threading.currentThread()

    global tello

    try:
        tello = Tello("192.168.10.3",
                      8888,
                      imperial=False,
                      command_timeout=0.3)

    except Exception as e:
        print("Exception Occurred: {}".format(traceback.format_exc()))
        if isinstance(tello, Tello):
            del tello

    if remote_server != '':
        # 2 threads in total. 1 for listening 1 for receiving

        clientsocket = None
        while clientsocket is None:
            try:
                serverip = remote_server.split(":")[0]
                port = remote_server.split(":")[1]
                clientsocket = socket.socket(socket.AF_INET,
                                             socket.SOCK_STREAM)
                clientsocket.connect((serverip, int(port)))
                print("Connected to {}:{}".format(serverip, port))

                time.sleep(2)

            except socket.error:
                raise RuntimeError(
                    "Connection error with server, trying again...")

        try:
            # start receiving thread
            th = threading.Thread(target=recv_thread, args=(clientsocket, ))
            th.do_run = True
            th.daemon = True  # main thread and still exit even if this thread is running.
            th.start()

        except Exception:
            raise RuntimeError("Error starting thread, trying again...")

    else:
        if w > 0 and h > 0:
            e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        else:
            e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368))

    ###########################
    # START CAMERA ############
    ###########################

    cam = cv2.VideoCapture(camera)
    # cam.set(cv2.CAP_PROP_FPS, 3)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))
    while True and getattr(t, "do_run", True):
        ####################################################
        # START CAMERA STREAM AND DRAW THE SKELETONS #######
        ####################################################
        ret_val, image = cam.read()
        image = cv2.flip(image, 1)

        # raw_image = copy.deepcopy(image)
        if remote_server != '':

            # start sending frame
            try:
                data = pickle.dumps(image)
                clientsocket.sendall(struct.pack("<L", len(data)) + data)
                processed_fifo.put(image, block=True)
                print("frames sent per second %s" %
                      (1.0 / (time.time() - frames_sent_per_sec)))

            except ZeroDivisionError:
                continue

            except Exception as e:
                print("Exception occurred: {}".format(traceback.format_exc()))
                clientsocket.shutdown(2)
                clientsocket.close()
                with processed_fifo.mutex:
                    processed_fifo.queue.clear()
                break

            # delay_time = 1000 / (1.0 / (time.time() - frames_sent_per_sec))  # 1000/delaytime = fps
            frames_sent_per_sec = time.time()

            # cv2.waitKey(int(math.ceil(delay_time)))
            cv2.waitKey(delay_time)

            continue

        # todo: send image to cpu instance on aws
        logger.debug('image process+')
        humans = e.inference(image,
                             resize_to_default=(w > 0 and h > 0),
                             upsample_size=resize_out_ratio)

        logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        # image = cv2.resize(image , (2*w,2*h),
        #                    interpolation = cv2.INTER_LINEAR)

        if len(humans) > 0:
            recognise_poses(image, humans)

        cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        fps_time = time.time()

        cv2.imshow('tf-pose-estimation result', image)

        if cv2.waitKey(1) == 115:
            # start the recognition
            pass

        if cv2.waitKey(1) == 27:
            break

        logger.debug('finished+')

    # exit receiving thread
    if (th.do_run):
        th.do_run = False
        th.join()
    with processed_fifo.mutex:
        processed_fifo.queue.clear()

    clientsocket.shutdown(2)
    clientsocket.close()
    cv2.destroyAllWindows()

コード例 #9

0

ファイルを表示

ファイル: crop_detection.py プロジェクト: MLCS-Yonsei/Speaker

class Detector():
    def __init__(self, target_ip):

        self.CWD_PATH = os.getcwd()
        self.CWD_PATH = os.path.abspath(os.path.join(self.CWD_PATH, os.pardir))
        self.CWD_PATH = os.path.join(self.CWD_PATH, '3_BRobot')

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
        PATH_TO_CKPT = os.path.join(self.CWD_PATH, 'object_detection',
                                    MODEL_NAME, 'frozen_inference_graph.pb')

        # List of the strings that is used to add correct label for each box.
        PATH_TO_LABELS = os.path.join(self.CWD_PATH, 'object_detection',
                                      'data', 'mscoco_label_map.pbtxt')

        NUM_CLASSES = 90

        # Loading label map
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.right_clicks = []

        # self.right_clicks = [[375, 41], [1000, 709]]
        # mouse callback function
        def mouse_callback(event, x, y, flags, params):
            #right-click event value is 2
            if event == 2:
                if len(self.right_clicks) < 2:
                    self.right_clicks.append([x, y])
                else:
                    self.right_clicks = [[x, y]]

                print(self.right_clicks)

        CAM_ID = 1

        self.cam = cv2.VideoCapture(int(CAM_ID))

        self.window_name = 'Cam' + str(CAM_ID)
        cv2.namedWindow(self.window_name)
        cv2.setMouseCallback(self.window_name, mouse_callback)

        self.prevTime = 0
        self.window_size = (1312, 736)

        if self.cam.isOpened() == False:
            print('Can\'t open the CAM(%d)' % (CAM_ID))
            exit()

        self.face_queue = Queue()
        self.gender_queue = Queue()
        self.age_queue = Queue()

        self.process_gender = Process(target=gender_estimate,
                                      args=(self.face_queue,
                                            self.gender_queue))
        self.process_gender.start()

        self.process_age = Process(target=age_estimate,
                                   args=(self.face_queue, self.age_queue))
        self.process_age.start()

        self.w = self.window_size[0]
        self.h = self.window_size[1]
        self.e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                                 target_size=(self.w, self.h))

    def detect_objects(self, image_np, sess, detection_graph, mot_tracker,
                       img_to_color, face_detect, face_queue, gender_queue,
                       age_queue):
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

        # Each box represents a part of the image where a particular object was detected.
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        # Actual detection.
        (boxes, scores, classes, num_detections) = sess.run(
            [boxes, scores, classes, num_detections],
            feed_dict={image_tensor: image_np_expanded})

        trackers = mot_tracker.update(boxes[0])

        person_ids = [i for i, e in enumerate(classes[0]) if e == 1]

        person_attr = {'age': 'NA', 'gender': 'NA', 'color': 'NA'}

        if len(person_ids) > 0:
            selected_person_id = person_ids[0]

            person_box = boxes[0][selected_person_id]
            person_score = scores[0][selected_person_id]
            person_tracker = trackers[selected_person_id]

            if person_score > 0.6:

                def get_color(q, img):
                    try:
                        start_time = time.monotonic()

                        c = img_to_color.get(img)
                        q.put({"flag": "color", "value": c})

                        elapsed_time = time.monotonic() - start_time
                        print("Color", elapsed_time)
                    except:
                        q.put({"flag": "color", "value": False})

                def detect_face(q, img, face_detect, face_queue, gender_queue,
                                age_queue):

                    start_time = time.monotonic()
                    # your code

                    files = []

                    faces, face_files, rectangles, tgtdir = face_detect.run(
                        img)
                    face_queue.put([face_files, img, tgtdir])
                    face_queue.put([face_files, img, tgtdir])

                    person_gender = gender_queue.get()
                    person_age = age_queue.get()
                    print("gender rcvd", person_gender)
                    print("Age rcvd", person_age)

                    q.put({"flag": "gender", "value": person_gender})
                    q.put({"flag": "age", "value": person_age})

                    elapsed_time = time.monotonic() - start_time
                    print("Age/Gender", elapsed_time)

                person_img = crop_img(image_np, person_box)

                q = Queue()
                procs = []

                process_color = Process(target=get_color,
                                        args=(
                                            q,
                                            person_img,
                                        ))
                procs.append(process_color)

                process_face = Process(target=detect_face,
                                       args=(q, person_img, face_detect,
                                             face_queue, gender_queue,
                                             age_queue))
                procs.append(process_face)

                for proc in procs:
                    proc.start()

                results = []
                for proc in procs:
                    results.append(q.get())
                results.append(q.get())

                for proc in procs:
                    proc.join()

                for result in results:
                    person_attr[result['flag']] = result['value']

                # print(person_attr)
                # override boxes
                boxes = np.expand_dims(person_box, axis=0)
                classes = [1]
                scores = np.expand_dims(person_score, axis=0)
                trackers = np.expand_dims(person_tracker, axis=0)
                person_attr = [person_attr]

                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    boxes,
                    classes,
                    scores,
                    trackers,
                    person_attr,
                    self.category_index,
                    use_normalized_coordinates=True,
                    line_thickness=3)

        return image_np, person_attr

    def detect_start(self):

        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
                # Load modules
                mot_tracker = Sort()

                npz = np.load('./bin/color_extractor/color_names.npz')
                img_to_color = ImageToColor(npz['samples'], npz['labels'])

                face_detect = face_detection_model(
                    'dlib',
                    './bin/age_gender/Model/shape_predictor_68_face_landmarks.dat'
                )
                person_attr = False

                while (True):
                    ret, frame = self.cam.read()

                    # Detection
                    if len(self.right_clicks) == 2:
                        print(self.right_clicks)
                        _y, _x, _d = frame.shape
                        [_c1, _c2] = self.right_clicks
                        crop_box = [
                            _c1[0] / _x,
                            _c1[1] / _y,
                            _c2[0] / _x,
                            _c2[1] / _y,
                        ]
                        cropped_img = crop_img(frame, crop_box)

                        try:
                            image_process, person_attr = self.detect_objects(
                                cropped_img, sess, self.detection_graph,
                                mot_tracker, img_to_color, face_detect,
                                self.face_queue, self.gender_queue,
                                self.age_queue)
                            print("####", person_attr)
                            if isinstance(person_attr, list):
                                if person_attr[0][
                                        'gender'] != 'NA' and person_attr[0][
                                            'gender'] != False:
                                    break
                            else:
                                if person_attr[
                                        'gender'] != 'NA' and person_attr[
                                            'gender'] != False:
                                    break

                        except Exception as e:
                            print(e)
                            pass

                    curTime = time.time()
                    sec = curTime - self.prevTime
                    self.prevTime = curTime
                    fps = 1 / (sec)

                    str1 = "FPS : %0.1f" % fps
                    str2 = "Testing . . ."
                    cv2.putText(frame, str1, (5, 20), cv2.FONT_HERSHEY_PLAIN,
                                1, (0, 255, 0))
                    cv2.putText(frame, str2, (100, 20), cv2.FONT_HERSHEY_PLAIN,
                                1, (0, 255, 0))
                    cv2.imshow(self.window_name, frame)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        self.detect_stop()
                        break

                    # plt.figure(figsize=IMAGE_SIZE)
                    # plt.imshow(image_process)
                    # plt.show()

                if person_attr:
                    return person_attr
                else:
                    return False

    def detect_stop(self):
        self.cam.release()
        # cv2.destroyWindow(self.window_name)

        cv2.destroyAllWindows()
        # self.process_gender.join()
        # self.process_age.join()
        print("Detect Stop")
        return True

    def pose_start(self):
        print("Pose Start")
        result = False
        while (True):
            ret, frame = self.cam.read()

            cropped_img = None
            if len(self.right_clicks) == 2:

                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                # print(self.right_clicks)
                _y, _x, _d = frame.shape
                [_c1, _c2] = self.right_clicks
                crop_box = [
                    _c1[0] / _x,
                    _c1[1] / _y,
                    _c2[0] / _x,
                    _c2[1] / _y,
                ]
                cropped_img = crop_img(frame, crop_box)

                humans = self.e.inference(frame,
                                          resize_to_default=(self.w > 0
                                                             and self.h > 0),
                                          upsample_size=4.0)
                if len(humans) > 0:
                    if 7 in humans[0].body_parts or 4 in humans[0].body_parts:
                        print("Hands Detected")
                        result = 1
                        break

                image = TfPoseEstimator.draw_humans(frame,
                                                    humans,
                                                    imgcopy=False)

                cv2.imshow('tf-pose-estimation result', image)

            cv2.imshow(self.window_name, cv2.cvtColor(frame,
                                                      cv2.COLOR_RGB2BGR))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        return result

    def hands_detect_start(self):
        print("Hands detection start")
        result = False

        im_width = 320
        im_height = 180

        self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, im_width)
        self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, im_height)

        im_width, im_height = (self.cam.get(3), self.cam.get(4))

        score_thresh = 0.2

        # max number of hands we want to detect/track
        num_hands_detect = 2

        while True:
            ret, frame = self.cam.read()

            cropped_img = None
            if len(self.right_clicks) == 2:
                # resized_frame = cv2.resize(frame, (im_width, im_height))
                # print(self.right_clicks)
                _y, _x, _d = frame.shape
                [_c1, _c2] = self.right_clicks
                crop_box = [
                    _c1[0] / _x,
                    _c1[1] / _y,
                    _c2[0] / _x,
                    _c2[1] / _y,
                ]
                cropped_img = crop_img(frame, crop_box)

                # actual detection
                boxes, scores = detector_utils.detect_objects(
                    frame, self.hands_detection_graph,
                    self.hands_detection_sess)

                # Hands 위치 포지션 체크

                # 핸들 영역에 들어오면 리턴해서 게임 플레이
                # break

                # draw bounding boxes
                detector_utils.draw_box_on_image(num_hands_detect,
                                                 score_thresh, scores, boxes,
                                                 im_width, im_height, frame)

                cv2.imshow('Hands Detection', frame)

            cv2.imshow(self.window_name, frame)

            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

        return result

コード例 #10

0

ファイルを表示

ファイル: pose_estimation.py プロジェクト: silcheon/UROP_project

def get_keypoints(image, model='mobilenet_thin', display=False):
    cocopart_dict = {
        'Nose': 0,
        'Neck': 1,
        'RShoulder': 2,
        'RElbow': 3,
        'RWrist': 4,
        'LShoulder': 5,
        'LElbow': 6,
        'LWrist': 7,
        'RHip': 8,
        'RKnee': 9,
        'RAnkle': 10,
        'LHip': 11,
        'LKnee': 12,
        'LAnkle': 13,
        'REye': 14,
        'LEye': 15,
        'REar': 16,
        'LEar': 17
    }

    key_points = []

    # for logger
    '''
    logger = logging.getLogger('TfPoseEstimatorRun')
    logger.handlers.clear()
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    logger.propagate = False
    '''

    # need image, model, resize, resize-out-ratio
    w, h = 432, 368  # image size fixed 432x368
    upsample_size = 4.0  # default

    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))

    # if you use video capture, do not need line
    # estimate human poses from a single image !
    image = common.read_imgfile(image, None, None)

    if image is None:
        #logger.error('Image can not be read, path=%s' % args.image)
        print(f'Image {image} can not be read')
        sys.exit(-1)

    t = time.time()

    # upsample_size default setting 4.0
    humans = e.inference(image,
                         resize_to_default=(w > 0 and h > 0),
                         upsample_size=upsample_size)
    elapsed = time.time() - t

    #for key, value in cocopart_dict.items() :
    #    key_points[key] = (humans[0].body_parts[value].x, humans[0].body_parts[value].y)
    for key, value in cocopart_dict.items():
        # JS
        # 만약 추적된 Keypoints들이 없으면 key 값이 없다(제외시켜주기 위해 try 문 사용)
        try:
            key_points.append([
                humans[0].body_parts[value].x, humans[0].body_parts[value].y,
                humans[0].body_parts[value].score
            ])
        except:
            key_points.append([-1, -1, 0])

    #pprint.pprint(key_points)

    # display pose_estimation result
    if display == True:
        display_image(image, e)

    return key_points

コード例 #11

0

ファイルを表示

ファイル: openpose_run_directory.py プロジェクト: DropYearning/master_course_AI_2019

 empty_body_file_list = []
 extra_body_file_list = []
 for action_index, action in enumerate(input_action_type):
     direction_path = r"./input/" + action + r'/'
     files_grabbed = glob.glob(
         os.path.join(direction_path, '*.' + images_format))
     files_count = len(files_grabbed)
     all_file_count += files_count
     body_count = 0
     # if files_count == 0:
     #     logger.error("The directory has no image in type" + images_format + '!')
     #     sys.exit(-1)
     for i, file in enumerate(files_grabbed):
         image = common.read_imgfile(file, None, None)
         time_file_start = time.time()
         humans = e.inference(image, True, 4.0)
         time_file_elapsed = time.time() - time_file_start
         # logger.info('Processed image #%d/%d:  %s in %.4f seconds.' % (i, i, file, time_file_elapsed))
         # 图片的背景中有三脚架有可能会被openpose识别为一个身体
         for human in humans:
             body_dict = dict()
             human_split = str(human).split('BodyPart:')
             for bodypart in human_split:
                 left_parenthesis = bodypart.find('(')
                 if left_parenthesis != -1:
                     if left_parenthesis == 2:
                         bodypart_index = int(
                             bodypart[left_parenthesis -
                                      2:left_parenthesis - 1])
                     else:
                         bodypart_index = int(

コード例 #12

0

ファイルを表示

ファイル: predict_h36m_tfrecords.py プロジェクト: blzq/3D-pose-estimation

                img = img[:290, :, :]
            if img.shape[1] > 300:
                img = img[:, :300, :]
            if img.shape[0] < 290:
                pad_0 = 290 - img.shape[0]
                img = np.pad(img, [(0, pad_0), (0, 0), (0, 0)],
                             'constant',
                             constant_values=0)
            if img.shape[1] < 300:
                pad_1 = 300 - img.shape[1]
                img = np.pad(img, [(0, 0), (0, pad_1), (0, 0)],
                             'constant',
                             constant_values=0)

            h = estimator.inference(img,
                                    resize_to_default=True,
                                    upsample_size=4.0)
            if len(h) == 0:
                mask.append(False)
                human = np.zeros([14, 2], dtype=np.float32)
                visibility = np.zeros([14], dtype=np.bool)
            else:
                if len(h) > 1:
                    mask.append(False)
                else:
                    mask.append(True)
                human, visibility = tf_pose.common.MPIIPart.from_coco(h[0])
                human = np.array(human)
                visibility = np.array(visibility)

            avg_location = np.mean(human[visibility], axis=0)

コード例 #13

0

ファイルを表示

def main(args):
    if args.input is None:
        device = cv2.CAP_OPENNI2
    else:
        device = os.path.abspath(args.input)
    print(device)
    capture = cv2.VideoCapture(device)
    if args.input is None and not capture.isOpened():
        print("Capture device not opened")
        capture.release()
        return 1
    elif args.input is not None and not capture.isOpened():
        print("File not found")
        capture.release()
        return 1
    fig = plt.figure(figsize=(12, 6))

    # create pose estimator
    retval = capture.grab()
    _, color_im = capture.retrieve(0, cv2.CAP_OPENNI_BGR_IMAGE)
    image_size = color_im.shape

    if args.mode == 'openpose':
        pose_estimator2D = OpPoseEstimator(get_graph_path('cmu'),
                                           target_size=(image_size[1],
                                                        image_size[0]))
        pose_lifter3D = Prob3dPose(PROB_MODEL_PATH)
    else:
        pose_estimator = LftdPoseEstimator(image_size, SESSION_PATH,
                                           PROB_MODEL_PATH)
        pose_estimator.initialise()

    n = 0
    while True:
        n += 1
        retval = capture.grab()
        if not retval:
            break
        retval, color_im = capture.retrieve(0, cv2.CAP_OPENNI_BGR_IMAGE)
        color_im = cv2.cvtColor(color_im, cv2.COLOR_BGR2RGB)

        if args.mode == 'openpose':
            # estimation by OpenPose
            start_time_2D = time.perf_counter()
            estimated_pose_2d = pose_estimator2D.inference(
                color_im, resize_to_default=True, upsample_size=2.0)
            end_time_2D = time.perf_counter()
        else:
            # estimation by LFTD
            estimated_pose_2d, visibility, pose_3d = \
                pose_estimator.estimate(color_im)

        if len(estimated_pose_2d) == 0:
            plt.subplot(1, 1, 1)
            plt.imshow(color_im)
            plt.pause(0.01)
            continue

        if not args.do_3d:
            pose_3d = []
            start_time_3D, end_time_3D = 0, 0
        elif args.mode == 'openpose':
            pose_2d_mpii, visibility = to_mpii_pose_2d(estimated_pose_2d)
            start_time_3D = time.perf_counter()
            transformed_pose_2d, weights = pose_lifter3D.transform_joints(
                np.array(pose_2d_mpii), visibility)
            pose_3d = pose_lifter3D.compute_3d(transformed_pose_2d, weights)
            end_time_3D = time.perf_counter()
        if args.track_one:
            if 'prev' not in locals():
                prev = np.zeros(2)
            if args.mode == 'openpose':
                means = np.mean(transformed_pose_2d, axis=1)
            else:
                means = np.mean(estimated_pose_2d, axis=1)
            argmin = np.argmin(np.linalg.norm(means - prev, ord=1, axis=1))
            pose_3d = np.expand_dims(pose_3d[argmin], axis=0)
            prev = means[argmin]

        # Show 2D and 3D poses
        display_results(args.mode, color_im, estimated_pose_2d, visibility,
                        pose_3d)
        if args.output:
            fig.savefig(
                os.path.join(os.path.abspath(args.output),
                             'out{:09d}.png'.format(n)))

        if args.mode == 'openpose':
            print("OP - 2D: {}, 3D: {}".format(end_time_2D - start_time_2D,
                                               end_time_3D - start_time_3D))
        plt.pause(0.01)
        fig.clf()

    return retval

コード例 #14

0

ファイルを表示

ファイル: extract_frames.py プロジェクト: CCYChongyanChen/Activity-Recognition

def extract_frames(csv_paths, video_paths, output_path):
    fps_time = 0

    BODY_PARTS = {
        "Nose": 0,
        "Neck": 1,
        "RShoulder": 2,
        "RElbow": 3,
        "RWrist": 4,
        "LShoulder": 5,
        "LElbow": 6,
        "LWrist": 7,
        "RHip": 8,
        "RKnee": 9,
        "RAnkle": 10,
        "LHip": 11,
        "LKnee": 12,
        "LAnkle": 13,
        "REye": 14,
        "LEye": 15,
        "REar": 16,
        "LEar": 17,
        "Background": 18
    }

    BODY_PARTS_INTEREST = {
        "Nose": 0,
        "Neck": 1,
        "RShoulder": 2,
        "RElbow": 3,
        "RWrist": 4,
        "LShoulder": 5,
        "LElbow": 6,
        "LWrist": 7,
        "REye": 14,
        "LEye": 15
    }

    # Setup posenet model
    w = 243
    h = 432
    showBG = True
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h))

    # Setup output features table
    out_data = pd.DataFrame(columns=[
        'nose_x', 'nose_y', 'neck_x', 'neck_y', 'rshoulder_x', 'rshoulder_y',
        'relbow_x', 'relbow_y', 'rwrist_x', 'rwrist_y', 'lshoulder_x',
        'lshoulder_y', 'lelbow_x', 'lelbow_y', 'lwrist_x', 'lwrist_y',
        'reye_x', 'reye_y', 'leye_x', 'leye_y', 'label'
    ])

    # Open up an output display window
    cv2.namedWindow('output', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('output', 576, 1024)

    # Start extracting features for every video
    for idx, (csv_path, video_path) in enumerate(zip(csv_paths, video_paths)):
        # Read label time ranges
        rawdata = pd.read_csv(csv_path, header=0)
        rawdata.columns = ['start_time', 'end_time', 'label']

        cap = cv2.VideoCapture(video_path)
        #cap.set(cv2.CAP_PROP_POS_MSEC, 350*1000)

        # Save a pose to csv every n frames
        counter = 0
        n = 5
        last_label = ''

        # Moving average filter settings
        ma_length = 5  # Length of the moving average
        part_weight = 1.0 / ma_length  # Weight of a part in the moving average
        ma_counter = 0

        # Moving average of all body part locations; Used for filtering out jitters of the posenet
        bodypart_array_new = []  # Current set of bodyparts
        bodypart_array_ma = []  # Moving average set of bodyparts
        ma_bodyparts_array = [
        ]  # Sets of bodyparts for computing the moving average
        bodypart_to_del_idx = ma_length - 1
        # Index of the set of bodyparts to overwrite in ma_bodyparts_array
        # Initialize arrays
        for i in range(0, 10):
            bodypart_array_new.insert(i, BodyPart(0, 0, 0.5, 0.5, 0))
            bodypart_array_ma.insert(i, BodyPart(0, 0, 0, 0,
                                                 0))  # Must be all zeros
        for i in range(0, ma_length):
            ma_bodyparts_array.insert(i, bodypart_array_ma)

        bodypart_locations = np.zeros(2 * 10)

        if cap.isOpened() is False:
            print("Error opening video stream or file")

        while cap.isOpened():
            ret_val, image = cap.read()

            timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
            #if timestamp > 575:
            #    break

            label = rawdata['label'][(timestamp > rawdata['start_time'])
                                     & (timestamp < rawdata['end_time'])]
            try:
                humans = e.inference(image,
                                     resize_to_default=(w > 0 and h > 0),
                                     upsample_size=2)
            except:
                break
            if len(humans) > 0:
                humans = [humans[0]]

                # Extract body part of interest
                # If a part is not detected, use the part's last location
                for idx, key in enumerate(BODY_PARTS_INTEREST):
                    try:
                        bodypart_array_new[idx] = humans[0].body_parts[
                            BODY_PARTS[key]]
                    except:
                        bodypart_array_new[idx] = bodypart_array_ma[idx]

                # Perform moving average
                if ma_counter == ma_length:
                    bodypart_array_delete = ma_bodyparts_array[
                        bodypart_to_del_idx]
                    for idx, part in enumerate(bodypart_array_ma):
                        part.x = part.x + bodypart_array_new[idx].x*part_weight - \
                            bodypart_array_delete[idx].x*part_weight
                        part.y = part.y + bodypart_array_new[idx].y*part_weight - \
                            bodypart_array_delete[idx].y*part_weight
                        bodypart_array_ma[idx] = part
                    ma_bodyparts_array[bodypart_to_del_idx] = copy.deepcopy(
                        bodypart_array_new)
                    bodypart_to_del_idx = (bodypart_to_del_idx - 1) % ma_length
                    bodypart_array = bodypart_array_ma
                else:
                    # We have not seen enough samples yet for the moving average
                    # Just use the current sample as the output of the moving average filter
                    bodypart_array = bodypart_array_new
                    ma_counter = ma_counter + 1
                    ma_bodyparts_array[ma_length - ma_counter] = copy.deepcopy(
                        bodypart_array_new)
                    for idx, part in enumerate(bodypart_array_ma):
                        part.x += bodypart_array_new[idx].x * part_weight
                        part.y += bodypart_array_new[idx].y * part_weight
                        bodypart_array_ma[idx] = part

                # Update the humans array with the moving average data
                humans[0].body_parts = {}
                for idx, (key, part) in enumerate(
                        zip(BODY_PARTS_INTEREST, bodypart_array)):
                    humans[0].body_parts[BODY_PARTS[key]] = part

                # Draw the skeleton (only the parts of interest)
                if not showBG:
                    image = np.zeros(image.shape)
                image = TfPoseEstimator.draw_humans(image,
                                                    humans,
                                                    imgcopy=False)

                # Generate feature vector
                neck = bodypart_array[BODY_PARTS['Neck']]
                for idx, part in enumerate(bodypart_array):
                    bodypart_locations[idx * 2] = part.x - neck.x
                    bodypart_locations[idx * 2 + 1] = part.y - neck.y

                # Add label and append to feature matrix
                if label.size != 0:
                    cv2.putText(image, "Label: " + str(label.values[0]),
                                (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                (0, 255, 0), 2)
                    # Check if the current label has persisted over the past n frames
                    if last_label != label.values[0]:
                        counter = 0
                    else:
                        if counter == n:
                            print(label.values[0])
                            out_data.loc[
                                len(out_data),
                                'nose_x':'leye_y'] = bodypart_locations
                            out_data.loc[len(out_data) - 1,
                                         'label'] = label.values[0]
                            counter = 0
                        else:
                            if label.values[0] == 'steering':
                                counter += 0.5
                                # We have too many steering labels (about twice as much)
                            else:
                                counter += 1
                    last_label = label.values[0]

            cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                        (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow('output', image)
            fps_time = time.time()
            if cv2.waitKey(1) == 27:
                break

        cap.release()

    # Save features to output csv
    out_data.to_csv(output_path, index=False)
    cv2.destroyAllWindows()

コード例 #15

0

ファイルを表示

ファイル: RhythmGame.py プロジェクト: una0106/SW2020

def start_game(config, params):
    #게임 들어가기 전 필요한 변수들 초기화
    cam = cv2.VideoCapture(0)
    ret, named_window = cam.read()

    # 실루엣 맞추기: 카메라 키고, (사진 띄우고, point 4개 범위 안에 들어오면) X 3번 loop 나가
    # sil = ["1.png", "2.png", "3.png"] # 이런 식

    # 게임 시작: clear_menu, pause_menu, death_menu 중에 하나로 끝남
    pause_img = cv2.imread('images/pause.png')
    score_img = cv2.imread('images/score.png')
    gameover_img = cv2.imread('images/gameover.png')

    # 목숨 관련 변수들
    hp_x = config.imWidth // 2 + 400
    hp_y = config.imHeight // 2 - 345
    hp_yy = config.imHeight // 2 - 300
    hp_w = 50
    hp_h = 42
    hp_image = cv2.imread('images/heart.png')

    w = 432
    h = 368
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(w, h),
                        trt_bool=str2bool("False"))

    global score
    while True:  # restart 하면 여기로 돌아오지 (실루엣 다시 안 해도 됨)
        params["restart"] = False
        hp = 10  # death까지의 목숨(?) (10번 못 맞추면 death_menu)
        cur_order = 0
        # params

        score = 0

        game_patterns = []  # 재구성할 리스트

        #엑셀에서 불러 온 값
        for i in params[
                "patterns"]:  # ex) i = [4.,0 0, 0, 3, 0, 0, 12, 0, 0, 0] 여기 ~ 89 !!
            list = []
            if i[10]:  #포즈를 위해서 i[10]이 true면 포즈 있는거여서 포즈 취할 시간줌 => 필요없음
                time1 = i[0] - 6.6
                time2 = i[0]
            else:  #포즈 없는 경우 -> 원에 사람의 bodypoint touch할 시간의 범위를 줌
                time1 = i[0] - 3  # 여기 ~ 81!!
                time2 = i[0] + 1
            list.extend([i[0], time1, time2, False,
                         i[10]])  #원래는i[0]시간인데 time1~time2시간의 범위를 주겠다
            # 구역 9개에 대해서 리스트에다가 (영역, 부위) 튜플을 원소로 append
            for j in range(1, 10):  # j = 1 ~ 9
                if i[j]:  #0이 아니면...원이 나와야됨
                    list.append(tuple([j - 1, i[j] - 1
                                       ]))  #excel에서 초시간때문에 구역 번호랑 -1차이 -> j-1
            game_patterns.append(
                list)  #i[j]-1 : excel에 잘못 적음->일일이 고치기 귀찮아서 -> i[j]-1

        # params["patterns"][0] = [4,0, 0, 0, 3, 0, 0, 12, 0, 0, 0]
        #   -> game_patterns[0] = [4.0, 3.5, 4.2, False, (2, 2), (5, 11)]  (구역번호, 부위번호)
        match_list = []  # 주어진 시간 안에 해당되는, match 해볼 규칙들

        #a = input('Press...')

        start_time = time.time()
        resume_time = 0.0
        resume_start = 0.0
        play_music(params["song"], 0)
        while True:  # game play

            ret, named_window = cam.read()
            config.named_window = cv2.resize(named_window,
                                             dsize=(1312, 736),
                                             interpolation=cv2.INTER_AREA)
            config.named_window = cv2.flip(config.named_window, 1)
            print(named_window.shape)
            humans = e.inference(named_window,
                                 resize_to_default=(w > 0 and h > 0),
                                 upsample_size=4.0)  # 4 / 1 ??
            if not humans:
                continue

            human = humans[0]

            image_h, image_w = config.named_window.shape[:2]

            #Human 클래스의 add_pair 함수(estimator.py의 62줄)로 포인트를 파악하고, 파악한 좌표를 centers 리스트에 저장
            #->머리부터 발끝까지의 키 포인트들이 화면에 표시됩니다.
            centers = []
            for i in range(common.CocoPart.Background.value):  #18번
                if i not in human.body_parts.keys():
                    centers.append((0, 0))
                else:
                    body_part = human.body_parts[i]
                    center = (image_w - int(body_part.x * image_w + 0.5),
                              int(body_part.y * image_h + 0.5))
                    centers.append(center)  #사람의 keypoint받아서 화면에 출력

            # 실루엣
            play_time = time.time() - start_time  # 플레이 시간 측정
            pattern = game_patterns[cur_order]

            # 어떤 규칙이 time1을 지나면 & 아직 match_list에 없으면(= 첫번째 조건 만족해도 중복 append 방지 위해)
            #game_patterns[cur_order][1]는 맞춰야 하는 시간 범위의 최솟값 && match_list에 없으면....
            if game_patterns[cur_order][1] < play_time and game_patterns[
                    cur_order] not in match_list:
                match_list.append(game_patterns[cur_order])
                # cur_pattern = Pattern()
                cur_order += 1
                if cur_order > len(
                        game_patterns
                ) - 1:  #이 조건을 만족하면 게임이 끝난것 ->cur_order고정 -> game 종료
                    cur_order = len(game_patterns) - 1
            if match_list:  #matchlist에 원소가 하나라도 있으면 아래 인자들 match함수에 넘겨줌
                # centers resize, flip      i = [4.0, 3.5, 4.2, F, 0 or PATH, (2, 3), (5, 12)] # 여기 ~ 33 !
                match_list = match(config, match_list, centers, hp,
                                   play_time)  #=> 위에 match 함수 가기~!!!!
            if match_list and match_list[0][
                    2] < play_time:  # and 아직 있으면        #터치해야 할 시간 지났음 -> 목숨 하나 빼기
                hp -= 1
                del match_list[
                    0]  # 고침!! 항상 [0]일 테니끼 right?     #끝나면 match_list에서 지우니까 항상 [0]지움
                # match_list.remove(game_patterns[cur_order]) 도 됨

            cv2.putText(config.named_window, 'score:',
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 300)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)  #실시간으로 점수 보여주기
            cv2.putText(config.named_window, '%d' % score,
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 250)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)

            if cur_order == len(
                    game_patterns
            ):  # 이런 식      #게임이 끝났으면(재구성한 list가) -> clear_menu보여주기
                config.named_window = score_img
                clear_menu(params, score)

            if cv2.waitKey(1) & 0xFF == ord('p'):
                params["exit"] = True

            if hp <= 0 or play_time > game_patterns[len(game_patterns) -
                                                    1][2] + 5:
                #마지막 game_patterns의 터치 허용 범위 시간이 지나고도 5초뒤
                mixer.music.stop()
                death_menu(params)  #죽음

            if params["exit"] == True:
                break
            if params["restart"] == True:  # 같은 게임 다시 시작
                break
            if params["menu"] == True:
                break

            for i in range(hp):
                if i < 5:  #실시간으로 변하는 window에 hp합성
                    show_hp(config.named_window, hp_image, hp_x + i * hp_w,
                            hp_y, hp_w, hp_h)
                if i >= 5:  #2줄로 만들었음
                    show_hp(config.named_window, hp_image,
                            hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h)

            cv2.imshow('McgBcg', config.named_window)  #image_h, image_w

        if params["exit"] == True:
            break
        if params["menu"] == True:
            break

コード例 #16

0

ファイルを表示

    live_y_arr = []
    orig_x_dict = {}
    orig_y_dict = {}
    orig_x_arr = []
    orig_y_arr = []

    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(432, 368),
                        trt_bool=False)
    logger.debug('Image read 1 +')

    ##############################################  LIVE ########################################

    image1 = cv2.imread('INPUTS/input1.jpg')
    humans1 = e.inference(image1,
                          resize_to_default=(w > 0 and h > 0),
                          upsample_size=4.0)

    for human in humans1:
        dict = human.body_parts
        for k, v in dict.items():
            live_x_dict[v.part_idx] = round(v.x, 2)
            live_y_dict[v.part_idx] = round(v.y, 2)

    live_x_arr = form_arr(live_x_dict)
    live_y_arr = form_arr(live_y_dict)

    print('\n\n******XXXXXXXX LIVE X ARR XXXXXXXXXX************')
    print(live_x_arr)
    print('******YYYYYYYY LIVE Y ARR YYYYYYYYYY************')
    print(live_y_arr)

コード例 #17

0

ファイルを表示

    fig = plt.figure(num='real-time plotting')
    sf = fig.add_subplot(111)
    plt.title('stream data')
    plt.xticks([0, 1500000, 3000000, 4500000, 6000000])
    plt.yticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
    plt.axhline(y=threshold, color='r', linestyle='--', linewidth=2)
    line1, = sf.plot([0, 6000000], [0, 1], 'b-')

    while cap2.isOpened():  # 비디오가 잡히면 loop
        try:
            ret_val1, image1 = cap1.read()
            ret_val2, image2 = cap2.read()

            logger1.debug('image process+')
            humans1 = e1.inference(image1,
                                   resize_to_default=(w1 > 0 and h1 > 0),
                                   upsample_size=args1.resize_out_ratio)
            logger2.debug('image process+')
            humans2 = e2.inference(image2,
                                   resize_to_default=(w2 > 0 and h2 > 0),
                                   upsample_size=args2.resize_out_ratio)
            ### 2:Video) 만약 --showBG=False 이면 skeleton만 출력
            if not args2.showBG:
                image2 = np.zeros(image2.shape)
            ###
            logger1.debug('postprocess+')
            image1 = TfPoseEstimator.draw_humans(image1,
                                                 humans1,
                                                 imgcopy=False)
            logger2.debug('postprocess+')
            image2 = TfPoseEstimator.draw_humans(image2,

コード例 #18

0

ファイルを表示

ファイル: animation.py プロジェクト: Eudie/tf-pose-estimation

class Terrain(object):
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin'
        camera = 0

        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]

        w, h = model_wh('432x368')
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        matlabfile = os.path.join(os.getcwd(), 'tf-pose-estimation',
                                  'prob_model_params.mat')
        self.poseLifting = Prob3dPose(matlabfile)
        keypoints = self.mesh(image)

        self.points = gl.GLScatterPlotItem(pos=keypoints,
                                           color=pg.glColor((0, 255, 0)),
                                           size=15)
        self.window.addItem(self.points)

        for n, pts in enumerate(self.connection):
            self.lines[n] = gl.GLLinePlotItem(pos=np.array(
                [keypoints[p] for p in pts]),
                                              color=pg.glColor((0, 0, 255)),
                                              width=3,
                                              antialias=True)
            self.window.addItem(self.lines[n])

    def mesh(self, image):
        image_h, image_w = image.shape[:2]
        width = 640
        height = 480
        pose_2d_mpiis = []
        visibilities = []

        humans = self.e.inference(image, upsample_size=4.0)

        for human in humans:
            pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
            pose_2d_mpiis.append([(int(x * width + 0.5), int(y * height + 0.5))
                                  for x, y in pose_2d_mpii])
            visibilities.append(visibility)

        pose_2d_mpiis = np.array(pose_2d_mpiis)
        visibilities = np.array(visibilities)

        transformed_pose2d, weights = self.poseLifting.transform_joints(
            pose_2d_mpiis, visibilities)
        pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights)
        keypoints = pose_3d[0].transpose()

        return keypoints / 80

    def update(self):
        """
        update the mesh and shift the noise each time
        """
        ret_val, image = self.cam.read()
        try:
            keypoints = self.mesh(image)
        except AssertionError:
            print('body not in image')
        else:
            self.points.setData(pos=keypoints)

            for n, pts in enumerate(self.connection):
                self.lines[n].setData(pos=np.array([keypoints[p]
                                                    for p in pts]))

    def start(self):
        """
        get the graphics window open and setup
        """
        if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
            QtGui.QApplication.instance().exec_()

    def animation(self, frametime=10):
        """
        calls the update method to run in a loop
        """
        timer = QtCore.QTimer()
        timer.timeout.connect(self.update)
        timer.start(frametime)
        self.start()

コード例 #19

0

ファイルを表示

ファイル: mod_run_video.py プロジェクト: AlgieParvin/openpose-gui

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    if args.output_video != '':
        output_video = cv2.VideoWriter(args.output_video,
                                       cv2.VideoWriter_fourcc(*'DIVX'), fps,
                                       (width, height))

    while cap.isOpened():
        ret_val, image = cap.read()

        try:
            humans = e.inference(image,
                                 resize_to_default=(w > 0 and h > 0),
                                 upsample_size=4.0)
        except Exception:
            break

        print(args.showBG)
        if not args.showBG:
            image = np.zeros(image.shape)
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        if args.output_video != '':
            output_video.write(image)

        if cv2.waitKey(1) == 27:
            break

コード例 #20

0

ファイルを表示

def abn():
    print("abn")
    speak.Speak('action detection started')

    # Name of the directory containing the object detection module we're using
    MODEL_NAME = 'inference_graph'

    # Grab path to current working directory
    CWD_PATH = os.getcwd()

    # Path to frozen detection graph .pb file, which contains the model that is used
    # for object detection.
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                'frozen_inference_graph.pb')

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt')

    # Number of classes the object detector can identify
    NUM_CLASSES = 10

    ## Load the label map.
    # Label maps map indices to category names, so that when our convolution
    # network predicts `5`, we know that this corresponds to `king`.
    # Here we use internal utility functions, but anything that returns a
    # dictionary mapping integers to appropriate string labels would be fine
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Define input and output tensors (i.e. data) for the object detection classifier

    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Output tensors are the detection boxes, scores, and classes
    # Each box represents a part of the image where a particular object was detected
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represents level of confidence for each of the objects.
    # The score is shown on the result image, tog ether with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Initialize webcam feed
    video = cv2.VideoCapture(0)
    ret = video.set(3, 1280)
    ret = video.set(4, 720)

    start_time = time.time()

    w, h = model_wh(args.resize)
    if w > 0 and h > 0:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    else:
        e = TfPoseEstimator(get_graph_path(args.model),
                            target_size=(1280, 720))

    while (True):

        # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
        # i.e. a single-column array, where each item in the column has the pixel RGB value
        ret, frame = video.read()
        frame_expanded = np.expand_dims(frame, axis=0)

        # Perform the actual detection by running the model with the image as input
        (boxes, scores, classes,
         num) = sess.run([
             detection_boxes, detection_scores, detection_classes,
             num_detections
         ],
                         feed_dict={image_tensor: frame_expanded})

        # Draw the results of the detection (aka 'visulaize the results')
        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.60)
        #print(np.squeeze(classes),np.squeeze(boxes))

        #pose
        humans = e.inference(frame,
                             resize_to_default=(w > 0 and h > 0),
                             upsample_size=args.resize_out_ratio)
        image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False)
        cv2.putText(frame, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if (time.time() - start_time) > 10:
            break

        # All the results have been drawn on the frame, so it's time to display it.
        cv2.imshow('ProjectInt', frame)
        fps_time = time.time()

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

    # Clean up
    video.release()
    cv2.destroyAllWindows()

コード例 #21

0

ファイルを表示

ファイル: crop.py プロジェクト: zhangxuelei86/Hololens_Project

            )  # 'D:/Dataset/Action/my_dataset/crop/1/1_0001/'
            img_name = img_dir + img_path.split(
                '/'
            )[-1]  # 'D:/Dataset/Action/my_dataset/crop/1/1_0001/img_00001.jpg'
            # print(img_name)
            if int(img_dir.split('_')[-1].replace(
                    '/', '')) <= old_file_num[arr]:  # 跳過舊的，只crop新的片段
                break
            if not os.path.exists(img_dir):
                os.mkdir(img_dir)

            img = cv2.imread(img_path)
            image = img[:, 30:410, :].copy()

            humans = e.inference(image,
                                 resize_to_default=True,
                                 upsample_size=4.0)
            npimg, key_points = TfPoseEstimator.draw_one_human(image,
                                                               humans,
                                                               imgcopy=False,
                                                               score=0.8)

            cv2.putText(npimg, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                        (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                        2)
            cv2.imshow('tf-pose-estimation result', npimg)

            fps_time = time.time()
            # print(temp_class.Neck_x, temp_class.Neck_y)
            if key_points[1][0] == 0 or key_points[1][1] == 0:
                err += 1

コード例 #22

0

ファイルを表示

ファイル: pose_estimate.py プロジェクト: rktayal/poseEstimator

class PoseEstimator(object):
    """
    The PoseEstimator class manages all operations related
    to Pose Estimation. It acts as a wrapper on top of
    TfPoseEstimator implement inside openpose model.
    The class supplies human pose coorinates to
    requestor objects.
    """
    resize_out_ratio = 4.0  # no of relevance, kept for sake of completeness

    def __init__(self, resize='0x0', model='mobilenet_thin'):

        self.humans = None  # list of humans with pose info
        self.image = None
        self.bboxes = []  # list of bbox [x1, y1, x2, y2]
        """
        Two available models are cmu & mobilenet_thin
        if running in CPU mode only, then mobilenet_thin
        is recommended. Default fetches mobilenet_thin
        """
        self.model = model
        """
        if resize value is provided, it will resize images
        before they are processed. default=0x0, Recommends:
        432x368 or 656x368 or 1312x736
        """
        self.w, self.h = model_wh(resize)
        self.loadModel()

    def loadModel(self):
        """
        Loads the cmu or mobilenet model in memory
        """
        try:
            if self.w == 0 or self.h == 0:
                self.e = TfPoseEstimator(get_graph_path(self.model),
                                         target_size=(432, 368))
            else:
                self.e = TfPoseEstimator(get_graph_path(self.model),
                                         target_size=(self.w, self.h))
        except MemoryError:
            print("couldn't load model into memory...")

    def infer(self, image):
        """
        calls the inference API inside tf_pose (openpose)
        returning the poses of humans and drawing the skeleton
        on image frame
        """
        self.image = image
        if self.image is None:
            raise Exception('The image is not valid. check your image')

        self.humans = self.e.inference(self.image,
                                       resize_to_default=(self.w > 0
                                                          and self.h > 0),
                                       upsample_size=self.resize_out_ratio)
        self.image = TfPoseEstimator.draw_humans(self.image,
                                                 self.humans,
                                                 imgcopy=False)
        return self.image

    def getHumans(self):
        return self.humans

    def getImage(self):
        return self.image

    def _normalize_values(self, width, height):
        if self.w == 0:
            width = width * 432
        else:
            width = width * self.w
        if self.h == 0:
            height = height * 368
        else:
            height = height * self.h

        return width, height

    def getBboxes(self):
        return self.bboxes

    def getKeypoints(self):
        """
        Returns a list of keypoints of all
        the persons in a frame
        keypt_list = [keypts1, keypts2]
        keypts = [x1, y1, score, ...]
        """
        keypt_list = []
        for human in self.humans:
            keypts = []
            for key, values in human.body_parts.items():
                # print (key, 'x val %.2f' % values.x, 'y val %.2f' % values.y)
                # print ('values.part_idx, values.uidx ',
                #           values.part_idx, values.uidx)
                x, y = self._normalize_values(values.x, values.y)
                keypts.extend([x, y, values.score])
            keypt_list.append(keypts)
        #print (keypt_list)
        return keypt_list

    def _updateBboxes(self):
        self.bboxes = []
        for human in self.humans:
            min_x, min_y = math.inf, math.inf
            max_x, max_y = -1, -1
            bbox = [min_x, min_y, max_x, max_y]
            for key, values in human.body_parts.items():
                if values.x < min_x:
                    min_x = values.x
                if values.y < min_y:
                    min_y = values.y
                if values.x > max_x:
                    max_x = values.x
                if values.y > max_y:
                    max_y = values.y
            bbox = [min_x, min_y, max_x, max_y]
            self.bboxes.append(bbox)

    def showResults(self):
        """
        utility method for debug purposes,
        not called from anywhere
        """
        #print (self.humans)

        cv2.imshow('tf-pose-estimation result', self.image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

コード例 #23

0

ファイルを表示

ファイル: pose_test.py プロジェクト: sugartom/tf-pose-estimation

    e = TfPoseEstimator(get_graph_path(model_name), target_size=(432, 368))

    iteration_list = [10]
    for iteration in iteration_list:
        start = time.time()
        for i in range(iteration):
            # estimate human poses from a single image !
            # image = common.read_imgfile(image_path, None, None)
            image = cv2.imread(image_path)
            print("image shape = %s" % str(image.shape))
            if image is None:
                sys.exit(-1)
            t = time.time()
            humans = e.inference(image,
                                 resize_to_default=False,
                                 upsample_size=resize_out_ratio)
            elapsed = time.time() - t

        end = time.time()
        print("It takes %s sec to run %d images for tf-openpose" %
              (str(end - start), iteration))

    print(image.shape)
    print(humans)

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

    import matplotlib.pyplot as plt

    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

コード例 #24

0

ファイルを表示

class Tracker:
    """
    attributes:
    
    people = dict of people, key is their uuid
    names = dict of names, key is uuid

    frame_count = count of frames tracked
    scan_every_n_frames = # of frames between person scanning their face
    maximum_difference_to_match = the maximum average differnece of points to 
                                    mark it as not the same pose

    estimator = TfPoseEstimator

    poses = [] of poses from TfPoseEstimator from current frame
    faces = [] positions of faces from current frame
    save_faces_to - None if disabled, a string for an existing dir if enabled
    """
    def __init__(self):
        self.frame_count = 0
        self.scan_every_n_frames = 120
        self.max_face_scans = 5
        self.maximum_difference_to_match = 0.08

        self.names = {}
        self.people = {}

        self.estimator = TfPoseEstimator(get_graph_path("mobilenet_thin"),
                                         target_size=(432, 368))
        self.encodings = {}
        self.save_faces_to = None

    def load_encodings(self, filepath):
        self.encodings = pickle.loads(open("./encodings.p", "rb").read())

    def save_encodings(self, filepath):
        encoding_file = open(filepath, "wb")
        encoding_file.write(pickle.dumps(self.encodings))
        encoding_file.close()

    def create_encodings(self, faces_directory):
        facedirs = [
            filename for filename in os.listdir(faces_directory)
            if not isfile(join(faces_directory, filename))
        ]
        faces = dict.fromkeys(facedirs, [])
        encodings = {}

        for name in facedirs:
            faces[name] = []

            for filepath in [
                    filename
                    for filename in os.listdir(join(faces_directory, name))
                    if isfile(join(faces_directory, name, filename))
            ]:
                faces[name].append(join(faces_directory, name, filepath))

        for name in faces:
            encodings[name] = []

            for filepath in faces[name]:
                try:
                    image = cv2.imread(filepath)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    encoding = face_recognition.face_encodings(
                        image, [(0, 0, image.shape[0], image.shape[1])])
                    encodings[name].append(encoding[0])
                except:
                    continue

        self.encodings = encodings

    def get_pose(self, image):
        w = 432
        h = 368
        return self.estimator.inference(image,
                                        resize_to_default=(w > 0 and h > 0),
                                        upsample_size=4.0)

    def draw_output(self,
                    image,
                    draw_body=True,
                    draw_face=True,
                    draw_label=True):
        if draw_body:
            poses = []
            for person in self.people:
                if person.is_visible:
                    poses.append(person.pose)

            TfPoseEstimator.draw_humans(image, poses, imgcopy=False)

        for person in self.people:
            if not self.people[person].is_visible:
                continue

            if draw_face:
                top, left, bottom, right = self.people[person].face

                top = math.floor(top * image.shape[0])
                bottom = math.floor(bottom * image.shape[0])
                left = math.floor(left * image.shape[1])
                right = math.floor(right * image.shape[1])

                cv2.rectangle(image, (left, top), (right, bottom), (0, 0, 255),
                              2, 0)

                if draw_label:
                    cv2.putText(image, self.people[person].id, (left, top),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))

        return image

    def scan_face(self, image, person):
        if not person.is_visible:
            return

        top, left, bottom, right = person.face

        top = math.floor(top * image.shape[0])
        left = math.floor(left * image.shape[1])
        bottom = math.floor(bottom * image.shape[0])
        right = math.floor(right * image.shape[1])

        encoding = face_recognition.face_encodings(
            image, [(top, right, bottom, left)])

        if len(encoding) <= 0:
            return

        encoding = encoding[0]
        person.set_encoding(encoding)

    def compare_known_faces(self, person):
        if len(list(self.encodings.keys())) is 0:
            return

        name_key = []
        encodings = []
        counts = {}
        for name in self.encodings:
            results = face_recognition.compare_faces(self.encodings[name],
                                                     person.encodings[-1])

            match_count = results.count(True)
            counts[name] = match_count

            if match_count / len(results) >= 0.75:
                break

        biggest_match = max(counts, key=counts.get)

        if (counts[biggest_match] <= 3):
            return None
        else:
            return biggest_match

    # Handed a frame to process for tracking
    def process_frame(self, image):
        self.frame_count += 1

        #1 - Generate all the poses
        self.poses = self.get_pose(image)

        #3 - Tick each person
        for person in self.people:
            self.people[person].tick()

        #2 - see if the pose is someone we've seen in our people,
        # or if it's someone new to create a new person object for
        new_people = []

        for pose in self.poses:
            handled = False
            for person in self.people:
                difference = self.people[person].distance_from_pose(pose)
                if difference < self.maximum_difference_to_match:
                    self.people[person].update(pose)

                    handled = True
                    break

            if handled:
                continue
            else:
                #Create a new person
                person = Person()
                person.update(pose)
                new_people.append(person)

        for person in new_people:
            self.people[person] = person

        #4 - Now that we've generated the people, "tock" through all people
        # in order to have their decay occur
        for person in self.people:
            #scan the face of all new people
            if person in new_people:
                self.scan_face(image, person)

            #Scan the face of everyone else that hasnt been scanned for
            #self.scan_every_n_frames

            if person.is_visible and person.last_face_scan % self.scan_every_n_frames == 0 and len(
                    person.encodings) < self.max_face_scans:
                self.scan_face(image, person)

            if person.last_face_scan == 0:
                id = self.compare_known_faces(person)

                older_person = self.people.get(id)
                if older_person is not None:
                    person[id] = person

                if id is not None:
                    person.id = id
                elif self.save_faces_to:
                    person.save_face(image, self.save_faces_to)

            self.people[person].tock()

コード例 #25

0

ファイルを表示

ファイル: run_webcam.py プロジェクト: ummerubaiyat/Pose-Estimation

    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.camera)
    frame_width = int(cam.get(3))
    frame_height = int(cam.get(4))
    out = cv2.VideoWriter('video_out.mp4',
                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30,
                          (frame_width, frame_height))
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        logger.debug('image process+')
        humans = e.inference(image,
                             resize_to_default=(w > 0 and h > 0),
                             upsample_size=args.resize_out_ratio)

        logger.debug('postprocess+')
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

        logger.debug('show+')
        no_people = len(humans)
        print("no. of people: ", no_people)
        cv2.putText(image, "People: %d" % (no_people), (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
        cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.imshow('tf-pose-estimation result', image)
        out.write(image)
        fps_time = time.time()

コード例 #26

0

ファイルを表示

ファイル: generate_training_data.py プロジェクト: cudnn/mmnt

    else:
        e = TfPoseEstimator(get_graph_path("mobilenet_thin"),
                            target_size=(432, 368))
    for is_variable, data_type in enumerate(data_types):
        files = os.listdir(path + data_type)
        files.sort()
        for image_name in files:
            t = time.time()
            split_image_name = image_name.split(".")
            if split_image_name[-1] != "jpg":
                continue
            index = int(image_name.split(".")[0])
            img = common.read_imgfile(path + data_type + "/" + image_name,
                                      None, None)
            humans = e.inference(img,
                                 resize_to_default=False,
                                 upsample_size=4.0)

            for id, human in enumerate(humans):
                if id > 0:
                    print("multiple humans in image: " + image_name)
                output_file.write(str(index) + ", " + str(is_variable))
                for value, body_part_name in enumerate(CocoPart):
                    if display_images:
                        if value in human.body_parts:
                            body_part = human.body_parts[value]
                            print(
                                str(value) + " " + body_part_name.name +
                                "- x: " + str(body_part.x) + " y: " +
                                str(body_part.x) + " score: " +
                                str(body_part.score))

コード例 #27

0

ファイルを表示

class Dance:
    def __init__(self,
                 camera_num=0,
                 w=432,
                 h=368,
                 model='mobilenet_thin',
                 resize_out_ratio=4.0):
        # SSH with drone
        self.ssh = None
        self.host_keys = '/Users/rshmyrev/.ssh/known_hosts'
        self.drone_ip = '192.168.1.31'
        self.drone_username = '******'
        self.drone_pass = '******'
        self.ssh_stdin = None
        self.ssh_stdout = None
        self.ssh_stderr = None
        self.init_ssh()

        # Stabilize
        self._send_dima_command('stab')
        logger.info('Drone stabilized')

        # Camera
        self.camera_num = camera_num
        self.cam = None
        self._init_cam()

        # Model
        self.model = model
        self.resize_out_ratio = resize_out_ratio
        self.w = w
        self.h = h
        self.e = None
        self._init_estimator()

        # Image, humans, body parts
        self.image = None
        self.humans = []
        self.human = None
        self.pose = None
        self.hand = {'right': None, 'left': None}
        self.elbow_angle = {'right': None, 'left': None}
        self.wrist_position = {'right': None, 'left': None}
        self.hand_direction = {'right': None, 'left': None}

        # Draw params
        self.time = time.time()
        self.text_params = (cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        self.draw_resize = 1

        # Cmd
        self.cmd = None
        self.prev_cmd = None
        self.stop = False

        # Poses dict
        self.poses = {
            1: {
                'desc': 'both hands up',
                'cmd': 'forward'
            },
            2: {
                'desc': 'both hands down',
                'cmd': 'land',
                'stop': True
            },
            3: {
                'desc': 'both hands side',
                'cmd': 'backward'
            },
            4: {
                'desc': 'right hand up, left hand side',
                'cmd': 'left'
            },
            5: {
                'desc': 'left hand up, right hand side',
                'cmd': 'right'
            },
            6: {
                'desc': 'right hand down, left hand side',
                'cmd': 'go2',
                'stop': True
            },
            7: {
                'desc': 'left hand down, right hand side',
                'cmd': 'up'
            },
        }

    def _init_cam(self):
        self.cam = cv2.VideoCapture(self.camera_num)
        self.cam.read()

    def _init_estimator(self):
        logger.debug('initialization %s : %s' %
                     (self.model, get_graph_path(self.model)))
        if self.w > 0 and self.h > 0:
            self.e = TfPoseEstimator(get_graph_path(self.model),
                                     target_size=(self.w, self.h))
        else:
            self.e = TfPoseEstimator(get_graph_path(self.model),
                                     target_size=(432, 368))

    def init_ssh(self):
        self.ssh = paramiko.SSHClient()
        self.ssh.load_host_keys(self.host_keys)
        self.ssh.connect(self.drone_ip,
                         username=self.drone_username,
                         password=self.drone_pass)

    def reset_params(self):
        self.image = None
        self.humans = []
        self.human = None
        self.pose = None
        for hand_name in ('right', 'left'):
            self.hand[hand_name] = None
            self.elbow_angle[hand_name] = None
            self.wrist_position[hand_name] = None
            self.hand_direction[hand_name] = None

        self.ssh_stdin = None
        self.ssh_stdout = None
        self.ssh_stderr = None

    def get_humans(self):
        _, self.image = self.cam.read()
        logger.info('cam image={:d}x{:d}'.format(self.image.shape[1],
                                                 self.image.shape[0]))

        logger.debug('image process+')
        self.humans = self.e.inference(self.image,
                                       resize_to_default=(self.w > 0
                                                          and self.h > 0),
                                       upsample_size=self.resize_out_ratio)
        # logger.debug('Session: {}'.format(self.e.persistent_sess))
        logger.info('Count of humans: {}'.format(len(self.humans)))
        # logger.debug('Humans parts: {}'.format(self.humans[0]))

    def choose_best_human(self):
        """It's you"""
        if self.humans:
            self.human = self.humans[0]  # by Dima

    def destroy_all_humans(self):
        # TODO
        pass

    def get_hands(self):
        if not self.human:
            return

        self.hand['right'] = [
            (self.human.body_parts[i].x, self.human.body_parts[i].y)
            for i in range(2, 5) if i in self.human.body_parts
        ]
        self.hand['left'] = [(self.human.body_parts[i].x,
                              self.human.body_parts[i].y) for i in range(5, 8)
                             if i in self.human.body_parts]

        if len(self.hand['right']) != 3:
            self.hand['right'] = None
        if len(self.hand['left']) != 3:
            self.hand['left'] = None

    @staticmethod
    def _elbow_angle(hand, vertical=True):
        """

        :param vertical:
        :param hand: 3 points: (x1, x2, x3). x1 - плечо, x2 - локоть, x3 - запястье
        :return: degrees for x1-x2-x3 angle
        """
        x1, x2, x3 = hand

        if vertical:  # если нужно посчитать относительно вертикали, а не плеча
            x1 = (x2[0], x2[1] + 1)  # берем точку локтя и сдвигаем вверх

        v1 = vector(x1, x2)  # плечо
        v2 = vector(x2, x3)  # предплечье
        angle = angle_between(v1, v2)
        # logger.debug('Angle in rads: %f' % angle)
        return np.degrees(angle)

    @staticmethod
    def _wrist_position(hand):
        x1, x2, x3 = hand
        if x3[1] < x1[1] and x3[1] < x2[1]:
            return 'up'
        elif x3[1] > x1[1] and x3[1] > x2[1]:
            return 'down'
        else:
            return ''

    def _hand_direction(self, hand):
        angle_gap = 25  # degree

        angle = self.elbow_angle[hand]

        if angle <= 0 + angle_gap:
            return '90'

        elif 90 - angle_gap <= angle <= 90 + angle_gap:
            return '180'

        elif 180 - angle_gap <= angle:
            return '270'

        else:
            return ''

    def calculate_hands_direction(self):
        for hand_name in ('right', 'left'):
            if not self.hand[hand_name]:
                continue

            self.elbow_angle[hand_name] = self._elbow_angle(
                self.hand[hand_name])
            self.wrist_position[hand_name] = self._wrist_position(
                self.hand[hand_name])
            self.hand_direction[hand_name] = self._hand_direction(hand_name)

            logger.info(
                '{} hand. Angle: {}, wrist_direction: {}, hand_direction: {}'.
                format(hand_name, int(self.elbow_angle[hand_name]),
                       self.wrist_position[hand_name],
                       self.hand_direction[hand_name]))

    def calculate_pose(self):
        if not self.hand_direction['right'] or not self.hand_direction['left']:
            self.pose = None

        if self.hand_direction['right'] == '90' and self.hand_direction[
                'left'] == '90':
            self.pose = 1

        elif self.hand_direction['right'] == '270' and self.hand_direction[
                'left'] == '270':
            self.pose = 2

        elif self.hand_direction['right'] == '180' and self.hand_direction[
                'left'] == '180':
            self.pose = 3

        elif self.hand_direction['right'] == '90' and self.hand_direction[
                'left'] == '180':
            self.pose = 4

        elif self.hand_direction['left'] == '90' and self.hand_direction[
                'right'] == '180':
            self.pose = 5

        elif self.hand_direction['right'] == '270' and self.hand_direction[
                'left'] == '180':
            self.pose = 6

        elif self.hand_direction['left'] == '270' and self.hand_direction[
                'right'] == '180':
            self.pose = 7

    def send_pose2drone(self):
        if self.stop:  # поднят флаг остановки
            return

        pose = self.poses[self.pose]
        cmd = pose['cmd']
        self.stop = pose.get('stop', False)

        if cmd == self.prev_cmd:  # не отправляем одинаковые команды
            return

        self._send_dima_command(cmd)
        self.prev_cmd = cmd

    def infinite_loop(self):
        while True:
            self.reset_params()
            self.get_humans()
            if not self.humans:
                continue
            self.choose_best_human()
            if not self.human:
                continue
            self.get_hands()
            self.calculate_hands_direction()
            self.calculate_pose()

            # Draw
            image = TfPoseEstimator.draw_humans(self.image, [
                self.human,
            ],
                                                imgcopy=False)
            # resize
            image = cv2.resize(image,
                               None,
                               fx=self.draw_resize,
                               fy=self.draw_resize)

            # Draw angles and pose
            image = self._draw_angle(image)
            # image = self._draw_hand_direction(image)
            # image = self._draw_wrist_position(image)
            image = self._draw_pose(image)
            image = self._draw_cmd(image)
            image = self._draw_prev_cmd(image)
            # image = self._draw_fps(image)
            cv2.imshow('Result', image)

            if self.pose:
                logger.info('Pose {}, {} '.format(
                    self.pose, self.poses[self.pose]['desc']))
                self.send_pose2drone()

            if cv2.waitKey(1) == 27:
                break

        cv2.destroyAllWindows()

    def _draw_angle(self, npimg):
        image_h, image_w = npimg.shape[:2]

        for hand_name in ('right', 'left'):
            if not self.elbow_angle[hand_name]:
                continue

            center_point = self.hand[hand_name][1]
            center_on_image = (int(center_point[0] * image_w + 3.5),
                               int(center_point[1] * image_h + 0.5))
            cv2.putText(npimg,
                        "Angle: {}".format(int(self.elbow_angle[hand_name])),
                        center_on_image, *self.text_params)

        return npimg

    def _draw_hand_direction(self, npimg):
        image_h, image_w = npimg.shape[:2]

        for hand_name in ('right', 'left'):
            if not self.hand_direction[hand_name]:
                continue

            center_point = self.hand[hand_name][0]
            center_on_image = (int(center_point[0] * image_w + 3.5),
                               int(center_point[1] * image_h + 0.5))
            cv2.putText(npimg,
                        "Direction: {}".format(self.hand_direction[hand_name]),
                        center_on_image, *self.text_params)

        return npimg

    def _draw_wrist_position(self, npimg):
        image_h, image_w = npimg.shape[:2]

        for hand_name in ('right', 'left'):
            if not self.wrist_position[hand_name]:
                continue

            center_point = self.hand[hand_name][2]
            center_on_image = (int(center_point[0] * image_w + 3.5),
                               int(center_point[1] * image_h + 0.5))
            cv2.putText(
                npimg,
                "Wrist position: {}".format(self.wrist_position[hand_name]),
                center_on_image, *self.text_params)

        return npimg

    def _draw_pose(self, npimg):
        if self.pose:
            pose = self.poses[self.pose]
            cv2.putText(npimg, "Pose: %s" % pose['desc'], (10, 10),
                        *self.text_params)
        return npimg

    def _draw_cmd(self, npimg):
        if self.pose:
            pose = self.poses[self.pose]
            cv2.putText(npimg, "Command: %s" % pose['cmd'], (10, 30),
                        *self.text_params)
        return npimg

    def _draw_prev_cmd(self, npimg):
        if self.prev_cmd:
            cv2.putText(npimg, "Prev sended cmd: %s" % self.prev_cmd, (10, 50),
                        *self.text_params)
        return npimg

    def _draw_fps(self, npimg):
        cv2.putText(npimg, "FPS: %f" % (1.0 / (time.time() - self.time)),
                    (10, 10), *self.text_params)

        self.time = time.time()
        return npimg

    def _send_command(self, command):
        # 'source /opt/ros/kinetic/setup.bash'
        # 'source /home/pi/catkin_ws/devel/setup.bash'
        cmd = 'source /opt/ros/kinetic/setup.bash; source /home/pi/catkin_ws/devel/setup.bash; {}'.format(
            command)
        # self.ssh_stdin, self.ssh_stdout, self.ssh_stderr = self.ssh.exec_command(cmd)
        # if self.ssh_stdout:
        #     logger.info(self.ssh_stdout.read())

        # try don't read remote std
        self.ssh.exec_command(cmd)

    def _send_ros_command(self, command, params):
        cmd = 'rosservice call /{} "{}"'.format(command, json.dumps(params))
        self._send_command(cmd)

    def _send_dima_command(self, filename):
        cmd = 'bash /home/pi/show/{}.sh'.format(filename)
        self._send_command(cmd)

    def get_telemetry(self, frame_id=''):
        command = "get_telemetry"
        params = {'frame_id': frame_id}
        self._send_ros_command(command, params)

    def navigate(self,
                 x=0,
                 y=0,
                 z=0,
                 speed=0.5,
                 frame_id='aruco_map',
                 update_frame=True,
                 auto_arm=True):
        command = "navigate"
        params = {
            'x': x,
            'y': y,
            'z': z,
            'speed': speed,
            'frame_id': frame_id,
            'update_frame': update_frame,
            'auto_arm': auto_arm,
        }
        self._send_ros_command(command, params)

    def square(self, z=1, speed=1, sleep_time=1, update_frame=False):
        self.navigate(x=1,
                      y=1,
                      z=z,
                      speed=speed,
                      frame_id='aruco_map',
                      update_frame=update_frame)
        sleep(sleep_time)
        self.navigate(x=1,
                      y=2,
                      z=z,
                      speed=speed,
                      frame_id='aruco_map',
                      update_frame=update_frame)
        sleep(sleep_time)
        self.navigate(x=2,
                      y=2,
                      z=z,
                      speed=speed,
                      frame_id='aruco_map',
                      update_frame=update_frame)
        sleep(sleep_time)
        self.navigate(x=2,
                      y=1,
                      z=z,
                      speed=speed,
                      frame_id='aruco_map',
                      update_frame=update_frame)
        sleep(sleep_time)
        self.navigate(x=1,
                      y=1,
                      z=z,
                      speed=speed,
                      frame_id='aruco_map',
                      update_frame=update_frame)
        sleep(sleep_time)
        self.land()

    def up(self, z=1, tolerance=0.2):
        """
        Up on z metres

        :param z: высота
        :param tolerance: точность проверки высоты (м)
        """
        start = self.get_telemetry()  # Запоминаем изначальную точку
        self.navigate(z=z, speed=0.5, frame_id='aruco_map',
                      auto_arm=True)  # Взлетаем на 2 м
        while True:  # Ожидаем взлета
            if self.get_telemetry(
            ).z - start.z + z < tolerance:  # Проверяем текущую высоту
                break
            sleep(0.2)  # ??? как зависание сделать

    def land(self):
        self._send_ros_command('land', params={})

    def cmd_test(self):
        self._send_dima_command('test')

コード例 #28

0

ファイルを表示

def start_game(config, params):
    cam = cv2.VideoCapture(0)
    ret, named_window = cam.read()

    # hp(health point) attributes
    hp_x = config.imWidth // 2 + 400
    hp_y = config.imHeight // 2 - 345
    hp_yy = config.imHeight // 2 - 300
    hp_w = 50
    hp_h = 42
    hp_image = cv2.imread('images/heart.png')
    score_img = cv2.imread('images/score.png')

    w = 432
    h = 368
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(w, h),
                        trt_bool=str2bool("False"))

    while True:
        params["restart"] = False
        hp = 10
        cur_order = 0
        score = 0

        game_patterns = []

        for i in params["patterns"]:
            list = []
            time1 = i[0] - 3
            time2 = i[0] + 1
            list.extend([i[0], time1, time2, False, i[10]])

            for j in range(1, 10):  # j = 1 ~ 9
                if i[j]:
                    list.append(tuple([j - 1, i[j] - 1]))
            game_patterns.append(list)

        match_list = []  # sets to be checked for scoring; reset each frame

        start_time = time.time()
        play_music(params["song"], 0)

        while True:  # game play
            ret, named_window = cam.read()
            config.named_window = cv2.resize(named_window,
                                             dsize=(1312, 736),
                                             interpolation=cv2.INTER_AREA)
            config.named_window = cv2.flip(config.named_window, 1)
            print(named_window.shape)
            humans = e.inference(named_window,
                                 resize_to_default=(w > 0 and h > 0),
                                 upsample_size=4.0)
            if not humans:
                continue

            human = humans[0]

            image_h, image_w = config.named_window.shape[:2]
            centers = []
            for i in range(common.CocoPart.Background.value):
                if i not in human.body_parts.keys():
                    centers.append((0, 0))
                else:
                    body_part = human.body_parts[i]
                    center = (image_w - int(body_part.x * image_w + 0.5),
                              int(body_part.y * image_h + 0.5))
                    centers.append(center)

            play_time = time.time() - start_time
            pattern = game_patterns[cur_order]

            if game_patterns[cur_order][1] < play_time and game_patterns[
                    cur_order] not in match_list:
                match_list.append(game_patterns[cur_order])
                cur_order += 1
                if cur_order > len(game_patterns) - 1:
                    cur_order = len(game_patterns) - 1
            if match_list:
                match_list = match(config, match_list, centers, hp, play_time,
                                   score)
            if match_list and match_list[0][2] < play_time:  # and 아직 있으면
                hp -= 1
                del match_list[0]

            cv2.putText(config.named_window, 'score:',
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 300)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)
            cv2.putText(config.named_window, '%d' % score,
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 250)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)

            if cur_order == len(game_patterns):
                config.named_window = score_img
                clear_menu(params, score)

            if cv2.waitKey(1) & 0xFF == ord('p'):
                params["exit"] = True

            if hp <= 0 or play_time > game_patterns[len(game_patterns) -
                                                    1][2] + 5:
                mixer.music.stop()
                death_menu(params)

            if params["exit"] == True:
                break
            if params["restart"] == True:
                break
            if params["menu"] == True:
                break

            for i in range(hp):
                if i < 5:
                    show_hp(config.named_window, hp_image, hp_x + i * hp_w,
                            hp_y, hp_w, hp_h)
                if i >= 5:
                    show_hp(config.named_window, hp_image,
                            hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h)

            cv2.imshow('McgBcg', config.named_window)

        if params["exit"] == True:
            break
        if params["menu"] == True:
            break

コード例 #29

0

ファイルを表示

ファイル: lib_openpose.py プロジェクト: Nid1200/HAR-Web

class SkeletonDetector(object):
    # This class is mainly copied from https://github.com/ildoonet/tf-pose-estimation

    def __init__(self, model="cmu", image_size="432x368"):
        ''' Arguments:
            model {str}: "cmu" or "mobilenet_thin".        
            image_size {str}: resize input images before they are processed. 
                Recommends : 432x368, 336x288, 304x240, 656x368, 
        '''
        # -- Check input
        assert (model in ["mobilenet_thin", "cmu"])
        self._w, self._h = _get_input_img_size_from_string(image_size)

        # -- Set up openpose model
        self._model = model
        self._resize_out_ratio = 4.0  # Resize heatmaps before they are post-processed. If image_size is small, this should be large.
        self._config = _set_config()
        self._tf_pose_estimator = TfPoseEstimator(get_graph_path(self._model),
                                                  target_size=(self._w,
                                                               self._h),
                                                  tf_config=self._config)
        self._prev_t = time.time()
        self._cnt_image = 0

        # -- Set logger
        self._logger = _set_logger()

    def detect(self, image):
        ''' Detect human skeleton from image.
        Arguments:
            image: RGB image with arbitrary size. It will be resized to (self._w, self._h).
        Returns:
            humans {list of class Human}: 
                `class Human` is defined in 
                "src/githubs/tf-pose-estimation/tf_pose/estimator.py"
                
                The variable `humans` is returned by the function
                `TfPoseEstimator.inference` which is defined in
                `src/githubs/tf-pose-estimation/tf_pose/estimator.py`.

                I've written a function `self.humans_to_skels_list` to 
                extract the skeleton from this `class Human`. 
        '''

        self._cnt_image += 1
        if self._cnt_image == 1:
            self._image_h = image.shape[0]
            self._image_w = image.shape[1]
            self._scale_h = 1.0 * self._image_h / self._image_w
        t = time.time()

        # Do inference
        humans = self._tf_pose_estimator.inference(
            image,
            resize_to_default=(self._w > 0 and self._h > 0),
            upsample_size=self._resize_out_ratio)

        # Print result and time cost
        elapsed = time.time() - t
        self._logger.info('inference image in %.4f seconds.' % (elapsed))

        return humans

    def draw(self, img_disp, humans):

        img_disp = TfPoseEstimator.draw_humans(img_disp, humans, imgcopy=False)

        if IS_DRAW_FPS:
            cv2.putText(
                img_disp, "fps = {:.1f}".format(
                    (1.0 / (time.time() - self._prev_t))), (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        self._prev_t = time.time()

    def humans_to_skels_list(self, humans, scale_h=None):
        ''' Get skeleton data of (x, y * scale_h) from humans.
        Arguments:
            humans {a class returned by self.detect}
            scale_h {float}: scale each skeleton's y coordinate (height) value.
                Default: (image_height / image_widht).
        Returns:
            skeletons {list of list}: a list of skeleton.
                Each skeleton is also a list with a length of 36 (18 joints * 2 coord values).
            scale_h {float}: The resultant height(y coordinate) range.
                The x coordinate is between [0, 1].
                The y coordinate is between [0, scale_h]
        '''
        if scale_h is None:
            scale_h = self._scale_h
        skeletons = []
        NaN = 0
        for human in humans:
            skeleton = [NaN] * (18 * 2)
            for i, body_part in human.body_parts.items():  # iterate dict
                idx = body_part.part_idx
                skeleton[2 * idx] = body_part.x
                skeleton[2 * idx + 1] = body_part.y * scale_h
            skeletons.append(skeleton)
        return skeletons, scale_h

コード例 #30

0

ファイルを表示

def get_front_sizes(human_height=HUMAN_HEIGHT):
    dlab = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()

    sizeX, sizeY, minW, minH, maxW, maxH, rgb = segment(dlab,
                                                        '../data/6.jpeg',
                                                        show_orig=True)

    default_height = 432
    defult_width = 368
    default_model = 'cmu'
    default_resize = 4.0

    img = common.read_imgfile('../data/new.png')
    e = TfPoseEstimator(get_graph_path(default_model),
                        target_size=(default_height, defult_width))
    humans = e.inference(img,
                         resize_to_default=False,
                         upsample_size=default_resize)

    body_parts = humans[0].body_parts
    parts_coordinates = {}

    hand_size_left = arm_size([2, 3, 4], minW, body_parts, sizeX, sizeY)
    hand_size_right = arm_size([5, 6, 7], maxW, body_parts, sizeX, sizeY)

    height_on_image = maxH - minH
    human_in_pix = human_height / height_on_image

    shoulders = height_in_pic((body_parts[2].y + body_parts[5].y) / 2, sizeY)
    hips = int(height_in_pic((body_parts[8].y + body_parts[11].y) / 2, sizeY))

    # waist select
    waist = int(shoulders + (hips - shoulders) / 3 * 2)

    print(waist)
    waist_left_bound = maxW
    waist_right_bound = 0

    waist_left_bound, waist_right_bound = get_size(rgb, waist,
                                                   waist_left_bound,
                                                   waist_right_bound)
    parts_coordinates[20] = Point(waist_left_bound, waist)
    parts_coordinates[21] = Point(waist_right_bound, waist)
    waist_level = human_in_pix * (waist - minH)
    waist = human_in_pix * (waist_right_bound - waist_left_bound)

    # chest select
    chest = int(shoulders + (hand_size_left + hand_size_right) / 10)
    chest_left_bound = maxW
    chest_right_bound = 0

    print(chest)
    chest_left_bound, chest_right_bound = get_size(rgb, chest,
                                                   chest_left_bound,
                                                   chest_right_bound)
    parts_coordinates[18] = Point(chest_left_bound, chest)
    parts_coordinates[19] = Point(chest_right_bound, chest)
    chest_level = human_in_pix * (chest - minH)
    chest = human_in_pix * (chest_right_bound - chest_left_bound)

    # hips select
    hips_left_bound = maxW
    hips_right_bound = 0

    hips_left_bound, hips_right_bound = get_size(rgb, hips, hips_left_bound,
                                                 hips_right_bound)
    parts_coordinates[23] = Point(hips_left_bound, hips)
    parts_coordinates[24] = Point(hips_right_bound, hips)
    hips_level = human_in_pix * (hips - minH)
    hips = human_in_pix * (hips_right_bound - hips_left_bound)

    # leg length
    leg_length = (height_in_pic(body_parts[10].y, sizeY) -
                  height_in_pic(body_parts[9].y, sizeY) +
                  height_in_pic(body_parts[13].y, sizeY) -
                  height_in_pic(body_parts[12].y, sizeY))
    leg = human_in_pix * leg_length

    for part in body_parts:
        part

    return chest, hips, waist, leg, chest_level, hips_level, waist_level