Exemple #1
0
    def __init__(self, cfg, weight, meta, W, H, camera_offset, tcp):

        self.net = darknet.load_net(cfg, weight, 0)
        self.meta = darknet.load_meta(meta)
        self.bridge = CvBridge()
        # receiving image's resolution
        self.W = W
        self.H = H
        # camera's position referencing base_link
        self.CAMERA_OFFSET = camera_offset
        # tcp
        self.TCP = tcp
        # Openpose Pose Detector
        self.pose_detector = PoseDetector(thresh=0.5, visualize=True)

        print('Detector loaded')
Exemple #2
0
def convertData(gesture):
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet",
                                 "models/coco_posenet.npz",
                                 device=args.gpu)
    hand_detector = HandDetector("handnet",
                                 "models/handnet.npz",
                                 device=args.gpu)
    dataset = buildGestureDict("dataset/")
    gesturedf = pd.read_csv("sample.csv")
    for video in dataset[gesture]["videos"]:
        print("Currently processing the video for " + video["filename"])
        startvideo = time.time()
        cap = cv2.VideoCapture(video["filepath"])
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
        amount_of_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        print("Amount of Frames:", amount_of_frames)
        cap.set(cv2.CAP_PROP_FPS, 5)
        ret, img = cap.read()
        counter = 1
        df = pd.DataFrame(columns=["Head", "Left", "Right"])
        frame_tracker = int(amount_of_frames / 12)
        framecounter = 0
        #print(frame_tracker)
        left = 0
        right = 0
        while ret:
            ret, img = cap.read()
            # get video frame
            if not ret:
                print("Failed to capture image")
                break
            person_pose_array, _ = pose_detector(img)
            res_img = cv2.addWeighted(img, 0.6,
                                      draw_person_pose(img, person_pose_array),
                                      0.4, 0)
            if (counter % frame_tracker == 0):
                for person_pose in person_pose_array:
                    firstPerson = True
                    if not firstPerson:
                        continue
                    unit_length = pose_detector.get_unit_length(person_pose)
                    # hands estimation
                    # print("Estimating hands keypoints...")
                    hands = pose_detector.crop_hands(img, person_pose,
                                                     unit_length)
                    if hands["left"] is not None:
                        hand_img = hands["left"]["img"]
                        bbox = hands["left"]["bbox"]
                        hand_keypoints = hand_detector(hand_img,
                                                       hand_type="left")
                        for x in range(len(hand_keypoints)):
                            if (hand_keypoints[x] != None):
                                hand_keypoints[x] = list(
                                    np.delete(hand_keypoints[x], 2))
                                hand_keypoints[x] = [
                                    int(y) for y in hand_keypoints[x]
                                ]
                        res_img = draw_hand_keypoints(res_img, hand_keypoints,
                                                      (bbox[0], bbox[1]))
                        left = hand_keypoints
                        cv2.rectangle(res_img, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 1)
                    else:
                        left = [[1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000],
                                [1000, 1000], [1000, 1000], [1000, 1000]]

                    if hands["right"] is not None:
                        hand_img = hands["right"]["img"]
                        bbox = hands["right"]["bbox"]
                        hand_keypoints = hand_detector(hand_img,
                                                       hand_type="right")
                        for x in range(len(hand_keypoints)):
                            if (hand_keypoints[x] != None):
                                hand_keypoints[x] = list(
                                    np.delete(hand_keypoints[x], 2))
                                hand_keypoints[x] = [
                                    int(y) for y in hand_keypoints[x]
                                ]
                        res_img = draw_hand_keypoints(res_img, hand_keypoints,
                                                      (bbox[0], bbox[1]))
                        right = hand_keypoints
                        cv2.rectangle(res_img, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 1)
                    else:
                        right = [[1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000],
                                 [1000, 1000], [1000, 1000], [1000, 1000]]
                    print("Body Pose")
                    person_pose = np.delete(person_pose, 9, 0)
                    person_pose = np.delete(person_pose, 9, 0)
                    person_pose = np.delete(person_pose, 10, 0)
                    person_pose = np.delete(person_pose, 10, 0)
                    person_pose = person_pose.tolist()
                    for z in range(len(person_pose)):
                        if (person_pose[z] != None):
                            person_pose[z] = list(np.delete(person_pose[z], 2))
                            person_pose[z] = [int(a) for a in person_pose[z]]
                    print(person_pose)
                    print("Left")
                    print(left)
                    print("Right")
                    print(right)
                cv2.imshow("result", res_img)
                head = person_pose
                for x in range(len(head)):
                    if (head[x] == None):
                        head[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                head = pca.fit_transform(head)
                dfhead = pd.DataFrame(data=head)
                dfhead = dfhead.T
                dfhead = dfhead.rename(
                    columns={
                        0: "head_1",
                        1: "head_2",
                        2: "head_3",
                        3: "head_4",
                        4: "head_5",
                        5: "head_6",
                        6: "head_7",
                        7: "head_8",
                        8: "head_9",
                        9: "head_10",
                        10: "head_11",
                        11: "head_12",
                        12: "head_13",
                        13: "head_14"
                    })
                for x in range(len(left)):
                    if (left[x] == None):
                        left[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                left = pca.fit_transform(left)
                dfleft = pd.DataFrame(data=left)
                dfleft = dfleft.T
                dfleft = dfleft.rename(
                    columns={
                        0: "left_1",
                        1: "left_2",
                        2: "left_3",
                        3: "left_4",
                        4: "left_5",
                        5: "left_6",
                        6: "left_7",
                        7: "left_8",
                        8: "left_9",
                        9: "left_10",
                        10: "left_11",
                        11: "left_12",
                        12: "left_13",
                        13: "left_14",
                        14: "left_15",
                        15: "left_16",
                        16: "left_17",
                        17: "left_18",
                        18: "left_19",
                        19: "left_20",
                        20: "left_21"
                    })
                for x in range(len(right)):
                    if (right[x] == None):
                        right[x] = [1000, 1000]
                pca = sklearnPCA(n_components=1)
                right = pca.fit_transform(right)
                dfright = pd.DataFrame(data=right)
                dfright = dfright.T
                dfright = dfright.rename(
                    columns={
                        0: "right_1",
                        1: "right_2",
                        2: "right_3",
                        3: "right_4",
                        4: "right_5",
                        5: "right_6",
                        6: "right_7",
                        7: "right_8",
                        8: "right_9",
                        9: "right_10",
                        10: "right_11",
                        11: "right_12",
                        12: "right_13",
                        13: "right_14",
                        14: "right_15",
                        15: "right_16",
                        16: "right_17",
                        17: "right_18",
                        18: "right_19",
                        19: "right_20",
                        20: "right_21"
                    })
                df2 = pd.concat([dfhead, dfleft, dfright], axis=1)
                df2["frame"] = framecounter
                df2["gesture"] = video["gesture"]
                df2["speaker"] = video["actor"]
                framecounter = framecounter + 1
                df2["frame"] = df2["frame"].astype(int)
                newdf = newdf.append(df2, sort=False)
                gesturedf = gesturedf.append(df2, sort=False)
                firstPerson = False
            else:
                cv2.imshow("result", img)
                counter = counter + 1
                #print("Frame",counter)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break  #print(df)
        cap.release()
        cv2.destroyAllWindows()
    gesturedf.to_csv("dataset720new/" + gesture + ".csv", index=False)
    print("Done Recording for: " + gesture)
    print("Took " + str(time.time() - startvideo) + "seconds")

# モジュール検索パスの設定
REPO_ROOT = ''

# PoseDetectorクラスのインポート
from pose_detector import PoseDetector

armave = {'Rzenwan': 63, 'Lzenwan': 64, 'Ljouwan': 67, 'Rjouwan': 66}
legave = {'Ldaitai': 96, 'Lkatai': 86, 'Rdaitai': 95, 'Rkatai': 90}

# モデルのロード
arch_name = 'posenet'
image_path = os.path.join(REPO_ROOT, 'data', 'person.png')
weight_path = os.path.join(REPO_ROOT, 'models', 'coco_posenet.npz')
model = PoseDetector(arch_name, weight_path)

#1はUSBカメラ、0は内蔵カメラ
cap = cv2.VideoCapture(1)
# cap = cv2.VideoCapture(1)
q = 0
timer = 0
w = 0
code = ""
isBasis = True
zdeg = {}
counting = 1
basisT = False
while (True):
    # Capture frame-by-frame
    ret, frame = cap.read()
chainer.using_config('enable_backprop', False)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--img', help='image file path')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet",
                                 "models/coco_posenet.npz",
                                 device=args.gpu)
    hand_detector = HandDetector("handnet",
                                 "models/handnet.npz",
                                 device=args.gpu)
    face_detector = FaceDetector("facenet",
                                 "models/facenet.npz",
                                 device=args.gpu)

    # read image
    img = cv2.imread(args.img)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)
    res_img = cv2.addWeighted(img, 0.6,
Exemple #5
0
from pose_detector import PoseDetector, draw_person_pose

chainer.using_config('enable_backprop', False)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet",
                                 "models/coco_posenet.npz",
                                 device=args.gpu)

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    while True:
        # get video frame
        ret, img = cap.read()

        if not ret:
            print("Failed to capture image")
            break

        person_pose_array, _ = pose_detector(img)
                        type=str,
                        default=False,
                        help="pose detector mode (precise or not)")
    parser.add_argument("--video_path",
                        "-vp",
                        type=str,
                        default=None,
                        help="video path to make a data")
    args = parser.parse_args()

    # load model
    hand_detector = HandDetector("handnet",
                                 "../models/handnet.npz",
                                 device=args.gpu)
    pose_detector = PoseDetector("posenet",
                                 "../models/coco_posenet.npz",
                                 device=args.gpu,
                                 precise=args.precise)

    # ビデオをキャプチャーして解析し,保存する
    cap = cv2.VideoCapture(args.video_path)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    if not cap.isOpened():
        sys.exit()
    train_file = open("train.csv", "a")
    train_file.write("hand_position,right_hand,left_hand")

    while True:

        # get video frame
def main(cap, im_scale=2, view_results=False):
    debug_i = 0
    fps_timer_arr = [0] * 16
    fps = 0

    # load model
    pose_device = 0
    pose_model_dir = '../../Chainer_Realtime_Multi-Person_Pose_Estimation/models'
    pose_detector = PoseDetector("posenet",
                                 f"{pose_model_dir}/coco_posenet.npz",
                                 device=pose_device)
    hand_detector = HandDetector("handnet",
                                 f"{pose_model_dir}/handnet.npz",
                                 device=pose_device)

    # cv2.namedWindow('display', flags=(cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE))
    if view_results: cv2.namedWindow('display')

    video_label_file = VideoLabelFile(cap.video_fname,
                                      fname_add='pre_points_pose')
    labels_current = defaultdict(lambda: [])
    labels_all_previous = video_label_file.load_previous()

    im_input = cap.read()
    im_input_shape = im_input.shape[0:2]

    first_run = True

    while (not cap.eof):
        fps_time_begin = time.perf_counter()
        debug_i += 1

        im_input = cap.read()
        current_frame_id = cap.frame_idx()
        # print(cap.info())

        im_pose = cv2.resize(im_input, (round(im_input_shape[1] / im_scale),
                                        round(im_input_shape[0] / im_scale)))
        if first_run:
            print(
                f"Video size {im_input.shape} -> Model input size {im_pose.shape}"
            )
            first_run = False

        ##########################################
        person_pose_array, _ = pose_detector(im_pose)
        im_display = cv2.addWeighted(
            im_pose, 0.6, draw_person_pose(im_pose, person_pose_array), 0.4, 0)

        for person_pose in person_pose_array:
            unit_length = pose_detector.get_unit_length(person_pose)

            # arr = np.array([a for a in person_pose if a is not None])
            # if arr.any():
            #     arr[:, 0:2] *= im_scale
            #     labels_current[current_frame_id].append(['pre_person_pose', arr.tolist()])

            # hands estimation
            hands = pose_detector.crop_hands(im_pose, person_pose, unit_length)
            if hands["left"] is not None:
                hand_img = hands["left"]["img"]
                bbox = hands["left"]["bbox"]
                hand_keypoints = hand_detector(hand_img, hand_type="left")
                im_display = draw_hand_keypoints(im_display, hand_keypoints,
                                                 (bbox[0], bbox[1]))
                cv2.rectangle(im_display, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (255, 255, 255), 1)

                if hand_keypoints[5] and hand_keypoints[8]:
                    f_points = np.array(
                        [hand_keypoints[5][:2], hand_keypoints[8][:2]])
                    f_points = (f_points +
                                np.array([bbox[0], bbox[1]])) * im_scale
                    #f_points = tuple(map(tuple, f_points.astype(int)))
                    f_points = f_points.astype(int).tolist()
                    labels_current[current_frame_id].append(f_points)

            if hands["right"] is not None:
                hand_img = hands["right"]["img"]
                bbox = hands["right"]["bbox"]
                hand_keypoints = hand_detector(hand_img, hand_type="right")
                im_display = draw_hand_keypoints(im_display, hand_keypoints,
                                                 (bbox[0], bbox[1]))
                cv2.rectangle(im_display, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (255, 255, 255), 1)

                if hand_keypoints[5] and hand_keypoints[8]:
                    f_points = np.array(
                        [hand_keypoints[5][:2], hand_keypoints[8][:2]])
                    f_points = (f_points +
                                np.array([bbox[0], bbox[1]])) * im_scale
                    #f_points = tuple(map(tuple, f_points.astype(int)))
                    f_points = f_points.astype(int).tolist()
                    labels_current[current_frame_id].append(f_points)

        #############################################
        for l in labels_current[current_frame_id]:
            cv2.circle(im_display,
                       (round(l[0][0] / im_scale), round(l[0][1] / im_scale)),
                       10, (255, 0, 0), 2)
            cv2.circle(im_display,
                       (round(l[1][0] / im_scale), round(l[1][1] / im_scale)),
                       10, (0, 255, 0), 2)

        cv2.putText(im_display,
                    f"frame {int(current_frame_id)}, fps: {int(fps)}.",
                    (10, im_display.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, (255, 255, 255), 2)

        if view_results:
            #cv2.imshow('display', im_display)
            cv2.imshow('display', im_pose)
        else:
            print(".", end="")
            sys.stdout.flush()

        # labels_current[current_frame_id].append

        #############################################
        ## KEYBOARD

        k = cv2.waitKey(5)
        if k == 27:  # esc
            break
        elif k == ord('c'):
            import ipdb
            ipdb.set_trace()
            # ipdb.set_trace()
            # pdb.set_trace()

        fps_timer_arr[debug_i % 16] = time.perf_counter() - fps_time_begin
        fps = int(len(fps_timer_arr) * 1 / sum(fps_timer_arr))

    print(". ")
    # cap.release()
    video_label_file.save_current_labels(labels_current,
                                         append_previous=False,
                                         custom_lists=True)

    if view_results: cv2.destroyAllWindows()
Exemple #8
0
    chainer.config.enable_backprop = False
    chainer.config.train = False

    # get directory of data (rgb, depth)
    print('Getting data from: {}'.format(args.data))
    dm = DataManagement(args.data)
    after = dt(2018, 7, 23, 14, 0, 0)
    before = dt(2018, 7, 23, 15, 0, 0)
    datetimes = dm.get_datetimes_in(after, before)

    # camera params
    o3_chain = Open3D_Chain()

    # load model
    pose_detector = PoseDetector("posenet", 
                                os.path.join(abs_op_lib, 
                                "models/coco_posenet.npz"), 
                                device=args.gpu)

    for datetime in datetimes:
        save_path = dm.get_save_directory(datetime)
        save_path = os.path.join(save_path, args.save)
        if not dm.check_path_exists(save_path):
            print('Making a save directory in: {}'.format(save_path))
            os.makedirs(save_path)
        else:
            continue

        rgb_path = dm.get_rgb_path(datetime)
        depth_path = dm.get_depth_path(datetime)

        # sort rgb files before looping
Exemple #9
0
class BlitzDetection:
    def __init__(self, cfg, weight, meta, W, H, camera_offset, tcp):

        self.net = darknet.load_net(cfg, weight, 0)
        self.meta = darknet.load_meta(meta)
        self.bridge = CvBridge()
        # receiving image's resolution
        self.W = W
        self.H = H
        # camera's position referencing base_link
        self.CAMERA_OFFSET = camera_offset
        # tcp
        self.TCP = tcp
        # Openpose Pose Detector
        self.pose_detector = PoseDetector(thresh=0.5, visualize=True)

        print('Detector loaded')

    def get_image(self, camera='/camera/color/image_raw'):

        img = rospy.wait_for_message(camera, Image)
        try:
            cv2_img = self.bridge.imgmsg_to_cv2(img, 'bgr8')
        except CvBridgeError as e:
            print e

        print "Image Received"
        return cv2_img

    '''
    def get_depth_image(self):

        depth_img = rospy.wait_for_message('/xtion/depth/image_raw', Image)
        try:
            depth_img = self.bridge.imgmsg_to_cv2(depth_img) # depth in mm unit
        except CvBridgeError as e:
            print e

        return depth_img/1000.0 # change to m unit
    '''

    def detection_all(self, thresh=0.7):

        cv_img = self.get_image()
        r = darknet.darknet(self.net, self.meta, cv_img)

        if len(r) == 0:
            print 'Could not detect anything'
            return None

        detection_list = []
        for item in r:
            name, prob, box_info = item
            if prob >= thresh:
                print '{} detected!'.format(name)
                detection_list.append(item)

        return detection_list

    def detection(self, target='teddy bear'):

        cv2_img = self.get_image()
        r = darknet.detect(self.net, self.meta, cv2_img)

        if len(r) == 0:
            print "Could not detect anything"
            return None

        for item in r:
            if target in item:
                print "Found teddy bear in the image"
                return r
            else:
                pass

        print "No teddy bear in this image"
        return None

    def detection_image_input(self, cv_img, target='teddy bear'):

        r = darknet.detect(self.net, self.meta, cv_img)

        if len(r) == 0:
            print "Could not detect anything"
            return None

        for item in r:
            if target in item:
                print "Found {} in the image".format(target)
                return r
            else:
                pass

        print "No {} in this image".format(target)
        return None

    def target_object(self, r, target='teddy bear'):

        for item in r:
            name, prob, box_info = item
            print(name)

        return [item for item in r if target in item][0]

    def detected_cloud(self, target, box_info):

        cloud = ros_numpy.numpify(
            rospy.wait_for_message('/camera/depth_registered/points',
                                   PointCloud2))

        target_cloud = []
        for i in range(self.H):
            for j in range(self.W):

                point = cloud[i, j]

                if math.isnan(point[0]) or math.isnan(point[1]) or math.isnan(
                        point[2]):
                    target_cloud.append((0, 0, 0))
                    continue

                (x, y, w, h) = box_info
                if j >= x - w / 2 and j <= x + w / 2 and i >= y - h / 2 and i <= y + h / 2:
                    target_cloud.append((point[0], point[1], point[2]))
                else:
                    target_cloud.append((0, 0, 0))

        # visualize target's point cloud
        '''
        fields = [
                    PointField('x', 0, PointField.FLOAT32, 1),
                    PointField('y', 4, PointField.FLOAT32, 1),
                    PointField('z', 8, PointField.FLOAT32, 1),
                 ]
        header = Header()
        header.frame_id = 'camera_depth_optical_frame'
        pub = rospy.Publisher('arm_move_point', PointCloud2, queue_size=2)
        pub.publish(pc2.create_cloud(header, fields, target_cloud))
        '''
        return target_cloud

    def target_position(self, cloud):

        point_list = []

        for point in cloud:

            if point == (0, 0, 0): continue
            else:
                x = self.CAMERA_OFFSET[0] + point[2]
                y = self.CAMERA_OFFSET[1] - point[0]
                z = self.CAMERA_OFFSET[2] - point[1]
                point_list.append((x, y, z))

        sorted_by_depth = sorted(point_list, key=lambda point: point[0])
        object_list = sorted_by_depth[:len(sorted_by_depth) / 2]

        x = sum([item[0] for item in object_list]) / len(object_list)
        y = sum([item[1] for item in object_list]) / len(object_list)
        z = sum([item[2] for item in object_list]) / len(object_list)

        return (x, y, z)

    def tcp_calc(self, x, y, z):

        differ = (x - self.TCP[0], y - self.TCP[1], z - self.TCP[2])

        return differ

    def crop(self, cv_img, x, y, w, h):

        cropped = cv_img[int(y - h / 2):int(y + h / 2),
                         int(x - w / 2):int(x + w / 2)]
        return cropped

    def hand_waving_callback(self, img):

        try:
            rgb_img = self.bridge.imgmsg_to_cv2(img, 'bgr8')
        except CvBridgeError as e:
            print e
            return

        self.hand_wave_img = rgb_img
        return

    def is_hand_waving(self):

        N = 10
        i = 0
        '''
        rate = rospy.Rate(5)
        rospy.Subscriber('/xtion/rgb/image_rect_color', Image, self.hand_waving_callback)        
        '''
        while i < N:
            '''
            while self.hand_wave_img is None:
                pass
            '''
            hand_wave_img = self.get_image(
                camera='/xtion/rgb/image_rect_color')
            r = self.detection_image_input(hand_wave_img, 'person')

            if r is None:
                continue

            name, prob, (x, y, w, h) = self.target_object(r, target='person')
            person_img = self.crop(hand_wave_img, x, y, w, h)
            # Directly putting briged img to Openpose causes fcuked up results
            cv2.imwrite('hand_waving_frames/person_frame_{}.jpg'.format(i),
                        person_img)
            i += 1

        is_waving = self.pose_detector.predict(N)

        if is_waving:
            print('A person is waving hand')
        else:
            print('A person is not waving hand')

        return
Exemple #10
0
def main():
    parser = argparse.ArgumentParser(description='Train pose estimation')
    parser.add_argument('--batchsize', '-B', type=int, default=10,
                        help='Training minibatch size')
    parser.add_argument('--valbatchsize', '-b', type=int, default=4,
                        help='Validation minibatch size')
    parser.add_argument('--log-interval', type=int, default=20)
    parser.add_argument('--vis-interval', type=int, default=20)
    parser.add_argument('--val-interval', type=int, default=1000)
    parser.add_argument('--val-samples', type=int, default=100,
                        help='Number of validation samples')
    parser.add_argument('--iteration', '-i', type=int, default=300000,
                        help='Number of iterations to train')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--optimizer', type=str, default='adabound',
                        choices=['adabound', 'adam'],
                        help='Optimizer')
    parser.add_argument('--test', action='store_true')
    parser.add_argument('--domain-randomization', action='store_true')
    parser.add_argument('--augment', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()
    result_output_path = make_fancy_output_dir(args.out, args=args)
    print("output file: {}".format(result_output_path))

    model = OpenPoseNet(
        len(fashion_landmark_utils.UpperClothJointType) + 1,
        len(fashion_landmark_utils.upper_cloth_joint_pairs) * 2)
    from chainer_openpose.datasets.coco import coco_utils
    people_model = OpenPoseNet(
        len(coco_utils.JointType) + 1,
        len(coco_utils.coco_joint_pairs) * 2)
    chainer.serializers.load_npz(
        coco_utils.get_coco_pretrained_model(), people_model)

    layer_names = ['Mconv2_stage2_L1', 'Mconv2_stage2_L2', 'Mconv2_stage3_L1', 'Mconv2_stage3_L2', 'Mconv2_stage4_L1', 'Mconv2_stage4_L2', 'Mconv2_stage5_L1', 'Mconv2_stage5_L2', 'Mconv2_stage6_L1', 'Mconv2_stage6_L2', 'Mconv3_stage2_L1', 'Mconv3_stage2_L2', 'Mconv3_stage3_L1', 'Mconv3_stage3_L2', 'Mconv3_stage4_L1', 'Mconv3_stage4_L2', 'Mconv3_stage5_L1', 'Mconv3_stage5_L2', 'Mconv3_stage6_L1', 'Mconv3_stage6_L2', 'Mconv4_stage2_L1', 'Mconv4_stage2_L2', 'Mconv4_stage3_L1', 'Mconv4_stage3_L2', 'Mconv4_stage4_L1', 'Mconv4_stage4_L2', 'Mconv4_stage5_L1', 'Mconv4_stage5_L2', 'Mconv4_stage6_L1', 'Mconv4_stage6_L2', 'Mconv5_stage2_L1', 'Mconv5_stage2_L2', 'Mconv5_stage3_L1',
                   'Mconv5_stage3_L2', 'Mconv5_stage4_L1', 'Mconv5_stage4_L2', 'Mconv5_stage5_L1', 'Mconv5_stage5_L2', 'Mconv5_stage6_L1', 'Mconv5_stage6_L2', 'Mconv6_stage2_L1', 'Mconv6_stage2_L2', 'Mconv6_stage3_L1', 'Mconv6_stage3_L2', 'Mconv6_stage4_L1', 'Mconv6_stage4_L2', 'Mconv6_stage5_L1', 'Mconv6_stage5_L2', 'Mconv6_stage6_L1', 'Mconv6_stage6_L2', 'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3_CPM', 'conv4_4_CPM', 'conv5_1_CPM_L1', 'conv5_1_CPM_L2', 'conv5_2_CPM_L1', 'conv5_2_CPM_L2', 'conv5_3_CPM_L1', 'conv5_3_CPM_L2', 'conv5_4_CPM_L1', 'conv5_4_CPM_L2',]
    for layer_name in layer_names:
        model[layer_name].copyparams(people_model[layer_name])
        # model[layer_name].disable_update()

    if args.initmodel:
        print('Load model from {}'.format(args.initmodel))
        chainer.serializers.load_npz(args.initmodel, model)
    train_chain = OpenPoseTrainChain(model)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
        train_chain.to_gpu()

    if args.optimizer == 'adam':
        optimizer = chainer.optimizers.Adam(alpha=1e-4,
                                            beta1=0.9,
                                            beta2=0.999,
                                            eps=1e-08)
    elif args.optimizer == 'adabound':
        optimizer = chainer.optimizers.AdaBound(alpha=1e-4,
                                                beta1=0.9,
                                                beta2=0.999,
                                                eps=1e-08)
    optimizer.setup(train_chain)

    train_datasets = FashionLandmarkKeypointsDataset(split='train')
    train = TransformDataset(train_datasets, Transform(mode='train'))
    val_datasets = FashionLandmarkKeypointsDataset(split='val')
    val = TransformDataset(val_datasets, Transform(mode='val'))

    if args.loaderjob:
        train_iter = chainer.iterators.MultiprocessIterator(
            train, args.batchsize, n_processes=args.loaderjob)
    else:
        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val_iter = chainer.iterators.SerialIterator(val, args.batchsize)

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater,
                               (args.iteration, 'iteration'),
                               result_output_path)

    val_interval = (args.val_interval, 'iteration')
    log_interval = (args.log_interval, 'iteration')
    vis_interval = (args.vis_interval, 'iteration')

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/loss', 'val/loss',
        'main/paf', 'val/paf',
        'main/heatmap', 'val/heatmap',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=1))

    # log plotter
    trainer.extend(extensions.PlotReport([
        'main/loss', 'val/loss',
        'main/paf', 'val/paf',
        'main/heatmap', 'val/heatmap'], file_name='loss.png',
                                         trigger=log_interval),
                   trigger=log_interval)

    # Visualization.
    pose_detector = PoseDetector(
        model,
        fashion_landmark_utils.UpperClothJointType,
        fashion_landmark_utils.upper_cloth_joint_pairs,
        device=args.gpu)
    trainer.extend(
        OpenPoseVisReport(
            val_iter,
            pose_detector,
            fashion_landmark_utils.upper_cloth_joint_pairs),
        trigger=vis_interval)

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
    trainer.run()
import chainer
from entity import params
from pose_detector import PoseDetector, draw_person_pose
from face_detector import FaceDetector, draw_face_keypoints
from hand_detector import HandDetector, draw_hand_keypoints

chainer.using_config('enable_backprop', False)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pose detector')
    parser.add_argument('--img', help='image file path')
    parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # load model
    pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu)
    hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu)
    face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu)

    # read image
    img = cv2.imread(args.img)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)
    res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0)

    # each person detected
    for person_pose in person_pose_array:
        unit_length = pose_detector.get_unit_length(person_pose)
Exemple #12
0
def estimate_pose(img_path, gpu = -1):
    # parser = argparse.ArgumentParser(description='Pose detector')
    # parser.add_argument('--img', help='image file path')
    # parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
    # args = parser.parse_args()

    # load model
    print("Loading pose detection model...")
    pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=gpu)
    print("Loading hand detection model...")
    hand_detector = HandDetector("handnet", "models/handnet.npz", device=gpu)
    # face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu)

    # read image
    img = cv2.imread(img_path)

    # inference
    print("Estimating pose...")
    person_pose_array, _ = pose_detector(img)

    res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0)

    # will cause the loop below to perform only at most 1 iteration; which means only 1 person will be recognized
    has_detected = False

    # each person detected
    for person_pose in person_pose_array:
        if has_detected:
            continue

        has_detected = True

        print("Body:", person_pose)
        unit_length = pose_detector.get_unit_length(person_pose)

        # face estimation
        # print("Estimating face keypoints...")
        # cropped_face_img, bbox = pose_detector.crop_face(img, person_pose, unit_length)
        # if cropped_face_img is not None:
        #     face_keypoints = face_detector(cropped_face_img)
        #     res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1]))
        #     cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

        # hands estimation
        print("Estimating hands keypoints...")
        hands = pose_detector.crop_hands(img, person_pose, unit_length)
        if hands["left"] is not None:
            hand_img = hands["left"]["img"]
            bbox = hands["left"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="left")
            print("Left hand: ", print_arr(hand_keypoints))

            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

        if hands["right"] is not None:
            hand_img = hands["right"]["img"]
            bbox = hands["right"]["bbox"]
            hand_keypoints = hand_detector(hand_img, hand_type="right")
            print("Right hand: ", print_arr(hand_keypoints))
            res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1]))
            cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)

    print('Saving result into result.png...')
    cv2.imwrite('result.png', res_img)