Beispiel #1
0
def parse_image(cv_image):

    candidate, subset = body_estimation(cv_image)

    canvas = copy.deepcopy(cv_image)
    canvas = util.draw_bodypose(canvas, candidate, subset)
    # detect hand
    hands_list = util.handDetect(candidate, subset, cv_image)

    all_hand_peaks = []
    for x, y, w, is_left in hands_list:
        peaks = hand_estimation(cv_image[y:y + w, x:x + w, :])
        peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
        peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
        all_hand_peaks.append(peaks)

    canvas = util.draw_handpose(canvas, all_hand_peaks)
    rgb_image = canvas
    skeleton = [(-1, -1)] * 18
    for i in range(18):
        for n in range(len(subset)):
            index = int(subset[n][i])
            if index == -1:
                continue
            x, y = candidate[index][0:2]
            skeleton[i] = (x, y)

    return rgb_image, skeleton, [peaks.tolist() for peaks in all_hand_peaks]
Beispiel #2
0
        for i in range(len(subset)):
            if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
                deleteIdx.append(i)
        subset = np.delete(subset, deleteIdx, axis=0)

        # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
        # candidate: x, y, score, id
        return candidate, subset


if __name__ == "__main__":
    body_estimation = Body('../model/pose_iter_146000.caffemodel.pt')
    image = cv2.imread('../images/000003072.jpg')

    # Test loss computation
    with open('mpii_annotations.json', 'rb') as f:
        annotations = json.load(f)
    jpg_name = '000003072.jpg'
    annotation = [x for x in annotations if x['img_paths'] == jpg_name][0]
    loss = body_estimation.inference_and_compute_loss(image, annotation)
    print('sample computed loss: {}'.format(loss))

    # Test model inference by displaying joint predictions
    for test_image in ['../images/000003072.jpg', '../images/ski.jpg', '../images/person.jpg',
                       '../images/mpii_sample.jpg']:
        oriImg = cv2.imread(test_image) # B,G,R order
        candidate, subset = body_estimation(oriImg)
        canvas = util.draw_bodypose(oriImg, candidate, subset)
        plt.imshow(canvas[:, :, [2, 1, 0]])
        plt.show()
Beispiel #3
0
import cv2
import util
from hand import Hand
from body import Body
import matplotlib.pyplot as plt
import copy
import numpy as np

body_estimation = Body('model/body_pose_model.pth')
hand_estimation = Hand('model/hand_pose_model.pth')

test_image = 'images/demo.jpg'
oriImg = cv2.imread(test_image)  # B,G,R order
candidate, subset = body_estimation(oriImg)
canvas = copy.deepcopy(oriImg)
canvas = util.draw_bodypose(canvas, candidate, subset)
# detect hand
hands_list = util.handDetect(candidate, subset, oriImg)

all_hand_peaks = []
for x, y, w, is_left in hands_list:
    # cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
    # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # if is_left:
    # plt.imshow(oriImg[y:y+w, x:x+w, :][:, :, [2, 1, 0]])
    # plt.show()
    peaks = hand_estimation(oriImg[y:y + w, x:x + w, :])
    peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
    peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
    # else:
Beispiel #4
0
def detect(oriImg, image_name, mode, model, hand_estimation, body_estimation, rpn, labels):
    

    height, width, channel = oriImg.shape
    canvas = copy.deepcopy(oriImg)

    if mode == "openpose":
        candidate, subset = body_estimation(oriImg)
        canvas = util.draw_bodypose(canvas, candidate, subset)
        # detect hand
        hands_list = util.handDetect(candidate, subset, oriImg)
        if rpn and labels: 
            # detect object
            detections , scale = detect_object.detect(rpn, oriImg)

        all_hand_peaks = []
        for x, y, w, is_left in hands_list:
            cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
           
            peaks = hand_estimation(oriImg[y:y+w, x:x+w, :])
            peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
            peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
            
            all_hand_peaks.append(peaks)


    elif mode == "handpose":
        bounding_box = inference_detector(model, oriImg)

        if rpn and labels: 
            # detect object
            detections , scale = detect_object.detect(rpn, oriImg)


        all_hand_peaks = []
        for xmin, ymin, xmax, ymax, prob in bounding_box[0]:
            if prob < 0.4:
                continue
            
            fixed_xmin = int(xmin) - 50
            if fixed_xmin <=0:
                fixed_xmin = 1
            fixed_xmax = int(xmax) + 50
            if fixed_xmax >= width:
                fixed_xmax = width - 1
            fixed_ymin = int(ymin) - 50
            if fixed_ymin <=0:
                fixed_ymin = 1
            fixed_ymax = int(ymax) + 50
            if fixed_ymax >= height:
                fixed_ymax = height - 1


            cv2.rectangle(canvas, (fixed_xmin, fixed_ymin), (fixed_xmax, fixed_ymax), (0, 0, 255), 2)
            peaks = hand_estimation(oriImg[fixed_ymin:fixed_ymax, fixed_xmin:fixed_xmax, :])
            peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+fixed_xmin)
            peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+fixed_ymin)

            all_hand_peaks.append(peaks)

    if rpn and labels: 
        canvas = detect_object.draw_ssd_bbox(canvas, detections, scale, labels)
    if all_hand_peaks:
        canvas = util.draw_handpose(canvas, all_hand_peaks)
        canvas = cv2.resize(canvas,(width, height))
        
    cv2.imwrite(os.path.join('demo','output',image_name),canvas)
    
    return
Beispiel #5
0
def main(webcamera=False):
    args = get_option()

    if args.object_detection:
        rpn = ssd_setting.build_ssd('test', 300, 21)
        rpn.load_weights(args.ssd_weight_path)
        from ssd.data import VOC_CLASSES as labels

    #bounding box model
    model = init_detector(args.config_file, args.weight_file, device="cuda:0")
    hand_estimation = Hand(args.hand_weight_path)
    if args.mode == "openpose":
        body_estimation = Body(args.body_weight_path)

    if args.video_name == "webcam":
        video = cv2.VideoCapture(0)
    else:
        video = cv2.VideoCapture(os.path.join('demo', 'input',
                                              args.video_name))

    fps = int(video.get(cv2.CAP_PROP_FPS))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
    new_video = cv2.VideoWriter(
        os.path.join('demo', 'output',
                     args.video_name.split('.')[0] + ".mp4"), fourcc, fps,
        (width, height))

    i = 0
    start = time.perf_counter()
    while True:
        ret, frame = video.read()

        if ret:

            canvas = copy.deepcopy(frame)

            if args.mode == "openpose":
                candidate, subset = body_estimation(frame)
                canvas = util.draw_bodypose(canvas, candidate, subset)
                # detect hand
                hands_list = util.handDetect(candidate, subset, frame)
                # detect object
                detections, scale = detect_object.detect(rpn, frame)

                all_hand_peaks = []
                for x, y, w, is_left in hands_list:
                    cv2.rectangle(canvas, (x, y), (x + w, y + w), (0, 255, 0),
                                  2,
                                  lineType=cv2.LINE_AA)

                    peaks = hand_estimation(frame[y:y + w, x:x + w, :])
                    peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0],
                                           peaks[:, 0] + x)
                    peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1],
                                           peaks[:, 1] + y)

                    all_hand_peaks.append(peaks)

            elif args.mode == "handpose":
                bounding_box = inference_detector(model, frame)
                # detect object
                detections, scale = detect_object.detect(rpn, frame)

                all_hand_peaks = []
                for xmin, ymin, xmax, ymax, prob in bounding_box[0]:
                    if prob < 0.5:
                        continue

                    fixed_xmin = int(xmin) - 50
                    if fixed_xmin <= 0:
                        fixed_xmin = 1
                    fixed_xmax = int(xmax) + 50
                    if fixed_xmax >= width:
                        fixed_xmax = width - 1
                    fixed_ymin = int(ymin) - 50
                    if fixed_ymin <= 0:
                        fixed_ymin = 1
                    fixed_ymax = int(ymax) + 50
                    if fixed_ymax >= height:
                        fixed_ymax = height - 1

                    cv2.rectangle(canvas, (fixed_xmin, fixed_ymin),
                                  (fixed_xmax, fixed_ymax), (0, 0, 255), 2)
                    peaks = hand_estimation(frame[fixed_ymin:fixed_ymax,
                                                  fixed_xmin:fixed_xmax, :])
                    peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0],
                                           peaks[:, 0] + fixed_xmin)
                    peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1],
                                           peaks[:, 1] + fixed_ymin)
                    all_hand_peaks.append(peaks)

            canvas = detect_object.draw_ssd_bbox(canvas, detections, scale,
                                                 labels)
            if all_hand_peaks:
                canvas = util.draw_handpose(canvas, all_hand_peaks)
                canvas = cv2.resize(canvas, (width, height))

            new_video.write(canvas)

            i = i + 1
            print(i)

        else:
            break

    end = time.perf_counter()
    print(end - start)

    new_video.release()
    return
Beispiel #6
0
def main():
    args = get_option()

    image_name = args.image_name
    config_file = args.config_file
    weight_file = args.weight_file

    #model load
    model = init_detector(config_file, weight_file, device="cuda:0")
    hand_estimation = Hand('model/hand_pose_model.pth')

    test_image = (os.path.join('demo', 'input', image_name))
    oriImg = cv2.imread(test_image)  # B,G,R order
    height, width, channel = oriImg.shape
    canvas = copy.deepcopy(oriImg)

    if args.mode == "openpose":
        body_estimation = Body('model/body_pose_model.pth')
        candidate, subset = body_estimation(oriImg)
        canvas = util.draw_bodypose(canvas, candidate, subset)
        # detect hand
        hands_list = util.handDetect(candidate, subset, oriImg)

        all_hand_peaks = []
        for x, y, w, is_left in hands_list:
            cv2.rectangle(canvas, (x, y), (x + w, y + w), (0, 255, 0),
                          2,
                          lineType=cv2.LINE_AA)
            # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # if is_left:
            # plt.imshow(oriImg[y:y+w, x:x+w, :][:, :, [2, 1, 0]])
            # plt.show()
            peaks = hand_estimation(oriImg[y:y + w, x:x + w, :])
            peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0],
                                   peaks[:, 0] + x)
            peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1],
                                   peaks[:, 1] + y)
            # else:
            #     peaks = hand_estimation(cv2.flip(oriImg[y:y+w, x:x+w, :], 1))
            #     peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], w-peaks[:, 0]-1+x)
            #     peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
            #     print(peaks)
            all_hand_peaks.append(peaks)

    elif args.mode == "handpose":
        bounding_box = inference_detector(model, oriImg)

        all_hand_peaks = []
        for xmin, ymin, xmax, ymax, prob in bounding_box[0]:
            if prob < 0.4:
                continue

            fixed_xmin = int(xmin) - 50
            if fixed_xmin <= 0:
                fixed_xmin = 1
            fixed_xmax = int(xmax) + 50
            if fixed_xmax >= width:
                fixed_xmax = width - 1
            fixed_ymin = int(ymin) - 50
            if fixed_ymin <= 0:
                fixed_ymin = 1
            fixed_ymax = int(ymax) + 50
            if fixed_ymax >= height:
                fixed_ymax = height - 1

            cv2.rectangle(canvas, (fixed_xmin, fixed_ymin),
                          (fixed_xmax, fixed_ymax), (0, 0, 255), 2)
            peaks = hand_estimation(oriImg[fixed_ymin:fixed_ymax,
                                           fixed_xmin:fixed_xmax, :])
            peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0],
                                   peaks[:, 0] + fixed_xmin)
            peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1],
                                   peaks[:, 1] + fixed_ymin)

            all_hand_peaks.append(peaks)

    if all_hand_peaks:
        canvas = util.draw_handpose(canvas, all_hand_peaks)
        canvas = cv2.resize(canvas, (width, height))

    cv2.imwrite(os.path.join('demo', 'output', image_name), canvas)
    return
Beispiel #7
0
def main(webcamera=False):
    args = get_option()
    video_name = args.video_name
    config_file = args.config_file
    weight_file = args.weight_file

    #bounding box model
    model = init_detector(config_file, weight_file, device="cuda:0")
    hand_estimation = Hand('model/hand_pose_model.pth')
    body_estimation = Body('model/body_pose_model.pth')

    if video_name == "webcam":
        video = cv2.VideoCapture(0)

    else:
        video = cv2.VideoCapture(os.path.join('demo', 'input', video_name))

    fps = int(video.get(cv2.CAP_PROP_FPS))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
    new_video = cv2.VideoWriter(os.path.join('demo', 'output', video_name),
                                fourcc, fps, (width, height))

    i = 0
    start = time.perf_counter()
    while True:
        ret, frame = video.read()

        if ret:

            canvas = copy.deepcopy(frame)

            if args.mode == "openpose":
                candidate, subset = body_estimation(frame)
                canvas = util.draw_bodypose(canvas, candidate, subset)
                # detect hand
                hands_list = util.handDetect(candidate, subset, frame)
                all_hand_peaks = []
                for x, y, w, is_left in hands_list:
                    cv2.rectangle(canvas, (x, y), (x + w, y + w), (0, 255, 0),
                                  2,
                                  lineType=cv2.LINE_AA)
                    # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

                    # if is_left:
                    # plt.imshow(oriImg[y:y+w, x:x+w, :][:, :, [2, 1, 0]])
                    # plt.show()
                    peaks = hand_estimation(frame[y:y + w, x:x + w, :])
                    peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0],
                                           peaks[:, 0] + x)
                    peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1],
                                           peaks[:, 1] + y)
                    # else:
                    #     peaks = hand_estimation(cv2.flip(oriImg[y:y+w, x:x+w, :], 1))
                    #     peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], w-peaks[:, 0]-1+x)
                    #     peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
                    #     print(peaks)
                    all_hand_peaks.append(peaks)

            elif args.mode == "handpose":
                bounding_box = inference_detector(model, frame)
                all_hand_peaks = []
                for xmin, ymin, xmax, ymax, prob in bounding_box[0]:
                    if prob < 0.5:
                        continue

                    fixed_xmin = int(xmin) - 50
                    if fixed_xmin <= 0:
                        fixed_xmin = 1
                    fixed_xmax = int(xmax) + 50
                    if fixed_xmax >= width:
                        fixed_xmax = width - 1
                    fixed_ymin = int(ymin) - 50
                    if fixed_ymin <= 0:
                        fixed_ymin = 1
                    fixed_ymax = int(ymax) + 50
                    if fixed_ymax >= height:
                        fixed_ymax = height - 1

                    cv2.rectangle(canvas, (fixed_xmin, fixed_ymin),
                                  (fixed_xmax, fixed_ymax), (0, 0, 255), 2)
                    peaks = hand_estimation(frame[fixed_ymin:fixed_ymax,
                                                  fixed_xmin:fixed_xmax, :])
                    peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0],
                                           peaks[:, 0] + fixed_xmin)
                    peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1],
                                           peaks[:, 1] + fixed_ymin)
                    all_hand_peaks.append(peaks)

            if all_hand_peaks:
                canvas = util.draw_handpose(canvas, all_hand_peaks)
                canvas = cv2.resize(canvas, (width, height))

            new_video.write(canvas)

            i = i + 1
            print(i)

        else:
            break

    end = time.perf_counter()
    print(end - start)

    new_video.release()
    return