コード例 #1
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2':1,'layer3':2,'layer4':3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    print(pre_state_dict['module.body.conv1.weight'].cpu().detach().numpy())
    pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2,0,1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1]/args.scale)
        size2 = int(img.shape[2]/args.scale)
        img = resize(img.float(),(size1,size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1,2,0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)

    font = cv2.FONT_HERSHEY_SIMPLEX

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]):
                cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
                cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)
                cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5,
                            thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255))

    image_name = args.image_path.split('/')[-1]
    save_path = os.path.join(args.save_path,image_name)
    #cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch',img)
    cv2.waitKey()
コード例 #2
0
def detect_faces(image, model):
    picked_boxes, picked_landmarks, _ = get_detections(image,
                                                       model,
                                                       score_threshold=0.98,
                                                       iou_threshold=0.2)

    scale = 2.5
    faces = []
    for i, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmarks in zip(boxes, picked_landmarks[i]):
                hor_shift = ((box[2] - box[0]) * (scale - 1)) / 2
                vert_shift = ((box[3] - box[1]) * (scale - 1)) / 2
                x1 = int(max(0, box[0] - hor_shift))
                y1 = int(max(0, box[1] - vert_shift))
                x2 = int(box[2] + hor_shift)
                y2 = int(box[3] + vert_shift)
                face = image[y1:y2, x1:x2]

                landmarks[0::2] -= x1
                landmarks[1::2] -= y1
                faces.append(align(face, landmarks))

    return faces
コード例 #3
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read video
    cap = cv2.VideoCapture(args.video_path)

    codec = cv2.VideoWriter_fourcc(*'MJPG')

    width = int(cap.get(3))
    height = int(cap.get(4))

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    fps = 25.0

    out = cv2.VideoWriter('args.save_path', codec, fps, (width, height))

    font = cv2.FONT_HERSHEY_SIMPLEX

    while (True):
        ret, img = cap.read()

        if not ret:
            print('Video open error.')
            break

        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1] / args.scale)
            size2 = int(img.shape[2] / args.scale)
            img = resize(img.float(), (size1, size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

        # np_img = resized_img.cpu().permute(1,2,0).numpy()
        np_img = img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = np_img.astype(np.uint8)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark, score in zip(boxes, picked_landmarks[j],
                                                picked_scores[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    cv2.circle(img, (landmark[0], landmark[1]),
                               radius=1,
                               color=(0, 0, 255),
                               thickness=2)
                    cv2.circle(img, (landmark[2], landmark[3]),
                               radius=1,
                               color=(0, 255, 0),
                               thickness=2)
                    cv2.circle(img, (landmark[4], landmark[5]),
                               radius=1,
                               color=(255, 0, 0),
                               thickness=2)
                    cv2.circle(img, (landmark[6], landmark[7]),
                               radius=1,
                               color=(0, 255, 255),
                               thickness=2)
                    cv2.circle(img, (landmark[8], landmark[9]),
                               radius=1,
                               color=(255, 255, 0),
                               thickness=2)
                    cv2.putText(img,
                                text=str(score.item())[:5],
                                org=(box[0], box[1]),
                                fontFace=font,
                                fontScale=0.5,
                                thickness=1,
                                lineType=cv2.LINE_AA,
                                color=(255, 255, 255))

        out.write(img)
        cv2.imshow('RetinaFace-Pytorch', img)
        key = cv2.waitKey(1)
        if key == ord('q'):
            print('Now quit.')
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()
コード例 #4
0
def main(nummmmmm):
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load('stage_5_68_full_model_epoch_121.pt',
                                map_location='cpu')
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)
    RetinaFace.to(device)

    import time

    video = cv2.VideoCapture(0)
    # Read image
    while True:
        start = time.time()
        ret, img = video.read()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)
        resized_img = img.float()
        # resized_img = resize(img.float(),(360,640))
        # print(resized_img.shape)
        input_img = resized_img.float().unsqueeze(0)

        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
        # print(picked_boxes)
        np_img = resized_img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    for i in range(0, 136, 2):
                        cv2.circle(img, (landmark[i], landmark[i + 1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)
        cv2.imshow('RetinaFace-Pytorch', img)
        print(time.time() - start)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
def main(nummmmmm):
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(
        '/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_51.pt')
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)
    RetinaFace.to(device)

    import time

    dataset_val = TrainDataset('./widerface/train/label.txt',
                               transform=transforms.Compose(
                                   [Resizer(640), PadToSquare()]))
    # dataset_val = ValDataset('./widerface/train/label.txt')
    for qq in range(100, 150):
        img = dataset_val[qq]['img']
        # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img=skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg".format(str(qq)))
        img = img.permute(2, 0, 1)
        resized_img = img.float()
        input_img = resized_img.unsqueeze(0).to(device)
        start = time.time()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.9, iou_threshold=0.2)
        print(time.time() - start)
        # print(picked_boxes)
        np_img = resized_img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    for i in range(0, 136, 2):
                        cv2.circle(img, (landmark[i], landmark[i + 1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)

        image_name = args.image_path.split('/')[-1]
        save_path = os.path.join(args.save_path, image_name)
        cv2.imwrite('./RetinaFace-Pytorch{}.jpg'.format(qq),
                    cv2.resize(img, (640, 640)))
コード例 #6
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2':1,'layer3':2,'layer4':3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    o = open("out.txt",'w')
    f = open(args.text_path, 'r')
    lines = f.readlines()
    for line in lines:
        o.write(line)
        print(line)
        line = line.rstrip()
        if line.startswith('#'):
            path = args.image_path_prefix+ line[2:].replace('/','\\')
        else:
            path = args.image_path_prefix+ line.replace('/','\\')+".jpg"

        if not os.path.exists(path):
            continue

        # Read image
        # change by yzk
        img1 = skimage.io.imread(path, as_gray=True).astype(np.float32)
        img = []
        img2 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
        img = (img2 * 255.0).astype(np.uint8)
        #ori
        # img = skimage.io.imread(path)


        img = torch.from_numpy(img)
        img = img.permute(2,0,1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1]/args.scale)
            size2 = int(img.shape[2]/args.scale)
            img = resize(img.float(),(size1,size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
        #print(str(picked_boxes[0].shape[0]))
        if picked_boxes is None or picked_boxes[0] is None:
            o.write("0" + '\n')
        else:
            o.write(str(picked_boxes[0].shape[0]) + '\n')
        # np_img = resized_img.cpu().permute(1,2,0).numpy()
        np_img = img.cpu().permute(1,2,0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)

        font = cv2.FONT_HERSHEY_SIMPLEX

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]):
                    ss = boxes.cpu().detach().numpy()
                    print(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy()- box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() -box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n')
                    o.write(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy() - box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() - box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n')
                    cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
                    cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                    cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                    cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                    cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                    cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)
                    cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5,
                                thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255))


        image_name = path.split('\\')[-1]
        save_path = os.path.join(args.save_path,image_name)
        # cv2.imwrite(save_path, img)
        # cv2.imshow('RetinaFace-Pytorch',img)
        # cv2.waitKey()
    o.close()
コード例 #7
0
def main():
    args = get_args()

    # Create retinaface
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.f_model)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    print('Retinaface create success.')

    # Create hopenet
    Hopenet = hopenet.Hopenet(torchvision.models.resnet.Bottleneck,
                              [3, 4, 6, 3], 66)

    saved_state_dict = torch.load(args.p_model)
    Hopenet.load_state_dict(saved_state_dict)
    Hopenet = Hopenet.cuda()
    Hopenet.eval()

    print('Hopenet create success.')

    idx_tensor = [idx for idx in range(66)]
    idx_tensor = torch.FloatTensor(idx_tensor).cuda()

    transformations = transforms.Compose([
        transforms.Scale(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    if args.type == 'image':
        img = cv2.imread(args.image_path)

        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1] / args.scale)
            size2 = int(img.shape[2] / args.scale)
            img = resize(img.float(), (size1, size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

        np_img = img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = np_img.astype(np.uint8)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    # Crop face
                    x_min = int(box[0])
                    x_max = int(box[2])
                    y_min = int(box[1])
                    y_max = int(box[3])
                    # Clip
                    x_min = x_min if x_min > 0 else 0
                    x_max = x_max if x_max < img.shape[1] else img.shape[1]
                    y_min = y_min if y_min > 0 else 0
                    y_max = y_max if y_max < img.shape[0] else img.shape[0]

                    if not x_min < x_max or not y_min < y_max:
                        continue

                    bbox_height = abs(y_max - y_min)
                    face_img = img[y_min:y_max, x_min:x_max]
                    face_img = Image.fromarray(face_img)

                    # Transform
                    face_img = transformations(face_img)
                    img_shape = face_img.size()
                    face_img = face_img.view(1, img_shape[0], img_shape[1],
                                             img_shape[2])
                    face_img = face_img.cuda()

                    yaw, pitch, roll = Hopenet(face_img)

                    yaw_predicted = F.softmax(yaw)
                    pitch_predicted = F.softmax(pitch)
                    roll_predicted = F.softmax(roll)
                    # Get continuous predictions in degrees.
                    yaw_predicted = torch.sum(
                        yaw_predicted.data[0] * idx_tensor) * 3 - 99
                    pitch_predicted = torch.sum(
                        pitch_predicted.data[0] * idx_tensor) * 3 - 99
                    roll_predicted = torch.sum(
                        roll_predicted.data[0] * idx_tensor) * 3 - 99

                    utils.draw_axis(img,
                                    yaw_predicted,
                                    pitch_predicted,
                                    roll_predicted,
                                    tdx=(x_min + x_max) / 2,
                                    tdy=(y_min + y_max) / 2,
                                    size=bbox_height / 2)
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (255, 0, 255),
                                  thickness=2)
                    # cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                    # cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                    # cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                    # cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                    # cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)

            cv2.imshow('RetinaFace-Hopenet', img)
            key = cv2.waitKey()

    else:
        # Read video
        cap = cv2.VideoCapture(args.video_path)

        codec = cv2.VideoWriter_fourcc(*'MJPG')

        width = int(cap.get(3))
        height = int(cap.get(4))

        cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
        fps = 25.0

        out = cv2.VideoWriter(args.out, codec, fps, (width, height))

        while (True):
            ret, img = cap.read()

            if not ret:
                print('Video open error.')
                break

            img = torch.from_numpy(img)
            img = img.permute(2, 0, 1)

            if not args.scale == 1.0:
                size1 = int(img.shape[1] / args.scale)
                size2 = int(img.shape[2] / args.scale)
                img = resize(img.float(), (size1, size2))

            input_img = img.unsqueeze(0).float().cuda()
            picked_boxes, picked_landmarks = eval_widerface.get_detections(
                input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

            # np_img = resized_img.cpu().permute(1,2,0).numpy()
            np_img = img.cpu().permute(1, 2, 0).numpy()
            np_img.astype(int)
            img = np_img.astype(np.uint8)

            for j, boxes in enumerate(picked_boxes):
                if boxes is not None:
                    for box, landmark in zip(boxes, picked_landmarks[j]):
                        # Crop face
                        x_min = int(box[0])
                        x_max = int(box[2])
                        y_min = int(box[1])
                        y_max = int(box[3])
                        # Clip
                        x_min = x_min if x_min > 0 else 0
                        x_max = x_max if x_max < img.shape[1] else img.shape[1]
                        y_min = y_min if y_min > 0 else 0
                        y_max = y_max if y_max < img.shape[0] else img.shape[0]

                        if not x_min < x_max or not y_min < y_max:
                            continue

                        bbox_height = abs(y_max - y_min)
                        face_img = img[y_min:y_max, x_min:x_max]
                        face_img = Image.fromarray(face_img)

                        # Transform
                        face_img = transformations(face_img)
                        img_shape = face_img.size()
                        face_img = face_img.view(1, img_shape[0], img_shape[1],
                                                 img_shape[2])
                        face_img = face_img.cuda()

                        yaw, pitch, roll = Hopenet(face_img)

                        yaw_predicted = F.softmax(yaw)
                        pitch_predicted = F.softmax(pitch)
                        roll_predicted = F.softmax(roll)
                        # Get continuous predictions in degrees.
                        yaw_predicted = torch.sum(
                            yaw_predicted.data[0] * idx_tensor) * 3 - 99
                        pitch_predicted = torch.sum(
                            pitch_predicted.data[0] * idx_tensor) * 3 - 99
                        roll_predicted = torch.sum(
                            roll_predicted.data[0] * idx_tensor) * 3 - 99

                        utils.draw_axis(img,
                                        yaw_predicted,
                                        pitch_predicted,
                                        roll_predicted,
                                        tdx=(x_min + x_max) / 2,
                                        tdy=(y_min + y_max) / 2,
                                        size=bbox_height / 2)
                        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                      (255, 0, 255),
                                      thickness=2)
                        # cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,0,255),thickness=2)
                        cv2.circle(img, (landmark[0], landmark[1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)
                        cv2.circle(img, (landmark[2], landmark[3]),
                                   radius=1,
                                   color=(0, 255, 0),
                                   thickness=2)
                        cv2.circle(img, (landmark[4], landmark[5]),
                                   radius=1,
                                   color=(255, 0, 0),
                                   thickness=2)
                        cv2.circle(img, (landmark[6], landmark[7]),
                                   radius=1,
                                   color=(0, 255, 255),
                                   thickness=2)
                        cv2.circle(img, (landmark[8], landmark[9]),
                                   radius=1,
                                   color=(255, 255, 0),
                                   thickness=2)

            out.write(img)
            cv2.imshow('RetinaFace-Pytorch', img)
            key = cv2.waitKey(1)
            if key == ord('q'):
                print('Now quit.')
                break

        cap.release()
        out.release()
    cv2.destroyAllWindows()
コード例 #8
0
def main():
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2, 0, 1)
    padded_img, _ = pad_to_square(img, 0)
    resized_img = resize(padded_img.float(), (640, 640))
    input_img = resized_img.unsqueeze(0).cuda()
    picked_boxes, picked_landmarks = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    np_img = resized_img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark in zip(boxes, picked_landmarks[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)

    image_name = args.image_path.split('/')[-1]
    save_path = os.path.join(args.save_path, image_name)
    cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch', img)
    cv2.waitKey()
コード例 #9
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    vc = cv2.VideoCapture(0)
    while True:  # 循环读取视频帧
        rval, img_raw = vc.read()
    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2, 0, 1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1] / args.scale)
        size2 = int(img.shape[2] / args.scale)
        img = resize(img.float(), (size1, size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark in zip(boxes, picked_landmarks[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)

    image_name = args.image_path.split('/')[-1]
    # save_path = os.path.join(args.save_path,image_name)
    # cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch', img)
    cv2.waitKey()
コード例 #10
0
def main():
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)
    RetinaFace.cuda()
    import time
    start = time.time()
    for qq in range(400, 500):
        img = skimage.io.imread(
            "/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg"
            .format(str(qq)))
        print(img.shape)
        # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)
        resized_img = resize(img.float(), (320, 320))
        input_img = resized_img.unsqueeze(0).cuda()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
        # print(picked_boxes)
        np_img = resized_img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    for i in range(0, 10, 2):
                        cv2.circle(img, (landmark[i], landmark[i + 1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)

        image_name = args.image_path.split('/')[-1]
        save_path = os.path.join(args.save_path, image_name)
        cv2.imwrite('RetinaFace-Pytorch{}.jpg'.format(qq),
                    cv2.resize(img, (640, 640)))
    print(time.time() - start)
コード例 #11
0
def detect_img(img):
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = torch.from_numpy(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    img = img.permute(2, 0, 1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1] / args.scale)
        size2 = int(img.shape[2] / args.scale)
        img = resize(img.float(), (size1, size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    font = cv2.FONT_HERSHEY_SIMPLEX

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark, score in zip(boxes, picked_landmarks[j],
                                            picked_scores[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (255, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)
                '''
                x = (landmark[0] + landmark[2]) / 2
                y = landmark[3] - (landmark[3] - box[1]) / 3
                cv2.circle(img,(x,y),radius=5,color=(0,0,255),thickness=1)
                '''
                '''
                start_point_x = (landmark[0] + landmark[2]) / 2
                start_point_y = (landmark[1] + landmark[3]) / 2
                end_point_x = (landmark[8] + landmark[6]) / 2
                end_point_y = (landmark[9] + landmark[7]) / 2
                cv2.line(img , (start_point_x,start_point_y),(landmark[4],landmark[5]),color=(255,100,0),thickness=2)
                cv2.line(img , (landmark[4],landmark[5]),(end_point_x,end_point_y),color=(255,255,100),thickness=2)
                '''
                '''
                cv2.rectangle(img,(landmark[0],landmark[1]),(landmark[8],landmark[9]),(0,0,100),thickness=2)
                '''
                cv2.putText(img,
                            text=str(score.item())[:5],
                            org=(box[0], box[1]),
                            fontFace=font,
                            fontScale=0.5,
                            thickness=1,
                            lineType=cv2.LINE_AA,
                            color=(255, 255, 255))

    return img