Ejemplo n.º 1
0
def main():
    if len(sys.argv) != 2:
        print('incorrect format of execution')
        return
    path_to_images = sys.argv[1]
    if not os.path.exists(path_to_images):
        print('incorrect path to images')
        return
    people_with_glasses = []

    input_shape = (150, 150, 3)
    model = create_convnet(input_shape)
    model.load_weights('./models/convnet.h5')
    print('loading model...')
    print('start detection')
    for f in os.listdir(path_to_images):
        f_full = os.path.join(path_to_images, f)
        image = detect_face(f_full)
        if image is None:
            print("detector couldn't find a face: %s" % f)
            continue
        result = model.predict(np.array([image]), batch_size=16)
        if result[0] > 0.5:
            people_with_glasses.append(f_full)
    print('done!')
    print("detected %d image (people with glasses)" % len(people_with_glasses))
    people_with_glasses.sort()
    if people_with_glasses:
        print('\n'.join(people_with_glasses))
    with open('results.txt', 'w') as f:
        for path in people_with_glasses:
            f.write(path + '\n')
Ejemplo n.º 2
0
    def add_frame(self, image):

        mask = utils.segment(image)
        face, foundFace = utils.detect_face(image)
        mask = utils.eliminate_face(face, foundFace, mask)

        if self.__using_stabilization:
            mask = utils.stabilize(foundFace, self.__no_of_frames + 1, image,
                                   face, mask)
        hand, hand_contour = utils.get_my_hand(mask, True)

        hand_pose, direction = 'None', 'None'

        if hand_contour is not None:
            motion_detected = self.__motion.get_hand_motion(hand_contour)
        else:
            return

        if not motion_detected:
            hand_pose = recognise_hand_pose(
                hand,
                directly_from_hand=True,
                model_path='Models/silatra_gesture_signs.sav')
        else:
            direction = motion_detected

        self.__observations.append((hand_pose, direction))
        self.__no_of_frames += 1
Ejemplo n.º 3
0
 def read_image(filename):
     img = cv2.imread(filename)
     rectangles = utils.detect_face(img)
     faces = []
     for rect in rectangles:
         landmarks = utils.align_face(img, rect)
         faces.append(((rect.left(), rect.top(), rect.right(),
                        rect.bottom()), landmarks))
     return img, faces
Ejemplo n.º 4
0
def detect_face_and_save(input_folder, output_folder, filename):
    output_path = os.path.join(output_folder, filename)
    if os.path.exists(output_path):
        return
    path_to_image = os.path.join(input_folder, filename)
    image_array = detect_face(path_to_image)
    if image_array is None:
        return 0
    image = Image.fromarray(image_array)
    image.save(output_path)
    return 1
 def load_image(self, filename):
     img = cv2.imread(filename)
     rectangles = utils.detect_face(img)
     if not rectangles:
         return False
     rect = rectangles[0]
     landmarks = utils.align_face(img, rect)
     self.img_rect = (rect.left(), rect.top(), rect.width(), rect.height())
     self.clear_sequence()
     self.add_to_sequence(img, landmarks)
     return True
Ejemplo n.º 6
0
    def _preprocess(color_path, depth_path, label, save_path, length,
                    img_size):
        if not (color_path.exists() and depth_path.exists()):
            print('Sample not found, skipped. {}'.format(
                color_path.parents[0]))
            return

        # read color, depth frames
        color_video = dataio.read_video(color_path)
        depth_video = dataio.read_video(depth_path)
        T, H, W, C = color_video.shape

        if T < length:
            return

        # crop to be a square (H, H) video,
        tr_y, tr_x, bl_y, bl_x = utils.detect_face(color_video)
        if tr_y == -1:
            return

        center_x = (tr_x - bl_x) // 2 + bl_x
        left_x = max(center_x - (H // 2), 0)

        color_video = color_video[:, :, left_x:left_x + H]
        depth_video = depth_video[:, :, left_x:left_x + H]

        # resize
        color_video = [
            imresize(img, (img_size, img_size)) for img in color_video
        ]
        depth_video = [
            imresize(img, (img_size, img_size), 'nearest')
            for img in depth_video
        ]
        color_video, depth_video = np.stack(color_video), np.stack(depth_video)
        depth_video = depth_video[..., 0]  # save as grayscale image

        # save
        name = "{}_{}_{}".format(color_path.parents[0].name,
                                 color_path.name[2:7], label)
        dataio.save_video_as_images(color_video, save_path / name / 'color')
        dataio.save_video_as_images(depth_video, save_path / name / 'depth')
        (save_path / 'color').mkdir(parents=True, exist_ok=True)
        (save_path / 'depth').mkdir(parents=True, exist_ok=True)
        dataio.write_video(color_video, save_path / 'color' / (name + ".mp4"))
        dataio.write_video(depth_video, save_path / 'depth' / (name + ".mp4"))

        return [name, T]
Ejemplo n.º 7
0
    def detect(self, img, landmarks=False):
        """
        :param img:
        :param landmarks:
        :return:
        """
        with torch.no_grad():
            batch_boxes, batch_points = detect_face(img, self.min_face_size,
                                                    self.pnet, self.rnet,
                                                    self.onet, self.thresholds,
                                                    self.factor, self.device)
        boxes, conf, points = [], [], []
        for box, point in zip(batch_boxes, batch_points):
            box = np.array(box)  # (len(box), 5)
            point = np.array(point)  # (len(box), 5)
            if len(box) == 0:
                boxes.append(None)
                conf.append(None)
                points.append(None)
            elif self.select_largest:
                # 按照框的面积从大到小排序索引
                box_order = np.argsort(
                    (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1]
                box = box[box_order]
                point = point[box_order]
                boxes.append(box[:, :4])
                conf.append(box[:, 4])
                points.append(point)
                pass
            else:
                boxes.append(box[:, :4])
                conf.append(box[:, 4])
                points.append(point)
        boxes = np.array(boxes)
        conf = np.array(conf)
        points = np.array(points)

        if not isinstance(img, (list, tuple)):
            boxes = boxes[0]
            conf = conf[0]
            points = points[0]
        if landmarks:
            return boxes, conf, points

        return boxes, conf
Ejemplo n.º 8
0
def recognise_hand_pose(image,
                        directly_from_hand=False,
                        model_path='Models/silatra_digits_and_letters.sav',
                        using_stabilization=False,
                        no_of_frames=1):
    '''
    ### SiLaTra Hand Pose Recognition

    Provides classification for input hand pose image.

    Inputs: (a) Mandatory Parameter - Image for which Hand Pose Classification is to be performed.

            (b) Optional Parameters (Use them only if you understand them):

                (1) directly_from_hand - boolean - Set this to true if you are passing already cropped hand region in `image` parameter.
                (2) model_path - String - If an alternate model is to be used, pass the path of its .sav file.
                (3) using_stabilization - boolean - If you intend to use Object stabilization, set this to True. Only use this option if you are classifying hand poses from a continuous feed, else its useless.
                (4) no_of_frames - Integer - ONLY TO BE USED IF using_stabilization IS True, pass the number of the frame from the continuous feed you are processing.
    '''

    import pickle
    from sklearn.neighbors import KNeighborsClassifier

    if not directly_from_hand:
        mask = utils.segment(image)
        face, foundFace = utils.detect_face(image)
        mask = utils.eliminate_face(face, foundFace, mask)

        if using_stabilization:
            mask = utils.stabilize(foundFace, no_of_frames, image, face, mask)

        hand = utils.get_my_hand(mask)
        if hand is False: return 'No hand pose in image'
        features = utils.extract_features(hand)
    else:
        features = utils.extract_features(image)

    classifier = pickle.load(open(model_path, 'rb'))
    hand_pose = classifier.predict([features])[0]

    return hand_pose
Ejemplo n.º 9
0
def extract_oneface(image, marigin=16):
    # detecting faces
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    h, w, c = image.shape
    total_boxes, points = detect_face(image, 20, pnet, rnet, onet,
                                      [0.6, 0.7, 0.7], 0.709)
    for idx, (bounding_box, keypoints) in enumerate(zip(total_boxes,
                                                        points.T)):
        bounding_boxes = {
            'box': [
                int(bounding_box[0]),
                int(bounding_box[1]),
                int(bounding_box[2] - bounding_box[0]),
                int(bounding_box[3] - bounding_box[1])
            ],
            'confidence':
            bounding_box[-1],
            'keypoints': {
                'left_eye': (int(keypoints[0]), int(keypoints[5])),
                'right_eye': (int(keypoints[1]), int(keypoints[6])),
                'nose': (int(keypoints[2]), int(keypoints[7])),
                'mouth_left': (int(keypoints[3]), int(keypoints[8])),
                'mouth_right': (int(keypoints[4]), int(keypoints[9])),
            }
        }

        bounding_box = bounding_boxes['box']
        keypoints = bounding_boxes['keypoints']

        # align face and extract it out
        align_image = align_face(image, keypoints)
        align_image = cv2.cvtColor(align_image, cv2.COLOR_RGB2BGR)

        xmin = max(bounding_box[0] - marigin, 0)
        ymin = max(bounding_box[1] - marigin, 0)
        xmax = min(bounding_box[0] + bounding_box[2] + marigin, w)
        ymax = min(bounding_box[1] + bounding_box[3] + marigin, h)

        crop_image = align_image[ymin:ymax, xmin:xmax, :]
        # "just need only one face"
        return crop_image
Ejemplo n.º 10
0
def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/digits_and_letters.sav', using_stabilization=False, no_of_frames=1):


    import pickle
    from sklearn.neighbors import KNeighborsClassifier

    if not directly_from_hand:
        mask = utils.segment(image)
        face, foundFace = utils.detect_face(image)
        mask = utils.eliminate_face(face, foundFace, mask)

        if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask)

        hand = utils.get_my_hand(mask)
        if hand is False: return 'No hand pose in image'
        features = utils.extract_features(hand)
    else: features = utils.extract_features(image)

    classifier = pickle.load(open(model_path, 'rb'))
    hand_pose = classifier.predict([features])[0]

    return hand_pose
Ejemplo n.º 11
0
    def __init__(self, args):
        self.args = args
        self.detector_faces, self.detector_landmarks = detect_face(args.model)
        self.sunglasses, self.points = load_assets(args.image)

        self.sun_h, self.sun_w, _ = self.sunglasses.shape
Ejemplo n.º 12
0
def run_on_image(net, height_size, cpu, track, smooth, img, stride,
                 upsample_ratio, num_keypoints, threshold):
    global previous_poses
    orig_img = img.copy()
    heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride,
                                            upsample_ratio, cpu)
    score = 0
    total_keypoints_num = 0
    all_keypoints_by_type = []
    for kpt_idx in range(num_keypoints):  # 19th for bg
        total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx],
                                                 all_keypoints_by_type,
                                                 total_keypoints_num)

    pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type,
                                                  pafs,
                                                  demo=True)
    for kpt_id in range(all_keypoints.shape[0]):
        all_keypoints[kpt_id,
                      0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio
                            - pad[1]) / scale
        all_keypoints[kpt_id,
                      1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio
                            - pad[0]) / scale
    current_poses = []
    for n in range(len(pose_entries)):
        if len(pose_entries[n]) == 0:
            continue
        pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
        for kpt_id in range(num_keypoints):
            if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
                pose_keypoints[kpt_id, 0] = int(
                    all_keypoints[int(pose_entries[n][kpt_id]), 0])
                pose_keypoints[kpt_id, 1] = int(
                    all_keypoints[int(pose_entries[n][kpt_id]), 1])
        pose = Pose(pose_keypoints, pose_entries[n][18])
        current_poses.append(pose)

    if track:
        track_poses(previous_poses, current_poses, smooth=smooth)
        previous_poses = current_poses
    for pose in current_poses:
        pose.draw(img)
    img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0)
    for pose in current_poses:
        # cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]),

        r_hand_center, r_hand_width, l_hand_center, l_hand_width, = detect_hand(
            pose)

        if -1 not in r_hand_center:
            cv2.circle(img, (r_hand_center[0], r_hand_center[1]), 5,
                       (255, 0, 0), 5)
            cv2.rectangle(img, (r_hand_center[0] - r_hand_width,
                                r_hand_center[1] - r_hand_width),
                          (r_hand_center[0] + r_hand_width,
                           r_hand_center[1] + r_hand_width), (0, 255, 255))
        if -1 not in l_hand_center:
            cv2.circle(img, (l_hand_center[0], l_hand_center[1]), 5,
                       (255, 0, 0), 5)
            cv2.rectangle(img, (l_hand_center[0] - l_hand_width,
                                l_hand_center[1] - l_hand_width),
                          (l_hand_center[0] + l_hand_width,
                           l_hand_center[1] + l_hand_width), (0, 255, 255))

        face_center, face_width = detect_face(pose)
        if -1 not in face_center:
            cv2.rectangle(
                img,
                (face_center[0] - face_width, face_center[1] - face_width),
                (face_center[0] + face_width, face_center[1] + face_width),
                (0, 0, 255))

            #               (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0))
            if track:
                cv2.putText(img, 'id: {}'.format(pose.id),
                            (face_center[0] - face_width,
                             face_center[1] - face_width - 16),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255))

        if -1 not in r_hand_center:
            x, y, h, w, score = detect_touch(face_center, face_width,
                                             r_hand_center, r_hand_width)
            if h != 0:
                cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 255))
                cv2.putText(img, f'Score: {score:0.2f}', (x, y - 16),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
        if -1 not in l_hand_center:
            x, y, h, w, score = detect_touch(face_center, face_width,
                                             l_hand_center, l_hand_width)
            if h != 0:
                cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 255))
                cv2.putText(img, f'Score: {score:0.2f}', (x, y - 16),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0))
    cv2.imshow('Lightweight Human Pose Estimation Python Demo', img)
    delay = 1
    detect = False

    key = cv2.waitKey(delay)
    if key == 27:  # esc
        return
    elif key == 112:  # 'p'
        if delay == 33:
            delay = 0
        else:
            delay = 33
    return score > threshold
Ejemplo n.º 13
0
cv2.namedWindow("detecting face")
cap = cv2.VideoCapture(0)

while(cap.isOpened()):
    ret,image = cap.read()
    if ret == True:
        # resize image
        image_h, image_w, _ = image.shape
        new_h, new_w = int(0.5*image_h), int(0.5*image_w)
        image = cv2.resize(image, (new_w, new_h))

        org_image = image.copy()
        # detecting faces
        # t1 = time.time()
        image = cv2.cvtColor(image ,cv2.COLOR_BGR2RGB)
        total_boxes, points = detect_face(image, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709)
        # t2 = time.time()
        # print("time: %.2fms" %((t2-t1)*1000))

        for idx, (bounding_box, keypoints) in enumerate(zip(total_boxes, points.T)):
            bounding_boxes = {
                    'box': [int(bounding_box[0]), int(bounding_box[1]),
                            int(bounding_box[2]-bounding_box[0]), int(bounding_box[3]-bounding_box[1])],
                    'confidence': bounding_box[-1],
                    'keypoints': {
                            'left_eye': (int(keypoints[0]), int(keypoints[5])),
                            'right_eye': (int(keypoints[1]), int(keypoints[6])),
                            'nose': (int(keypoints[2]), int(keypoints[7])),
                            'mouth_left': (int(keypoints[3]), int(keypoints[8])),
                            'mouth_right': (int(keypoints[4]), int(keypoints[9])),
                    }
Ejemplo n.º 14
0
    print("Wrong input format, example : ")
    print("python test.py input_image_path")
    exit()

path_i = args[1]

G = Generator(32)
GR = Generator_R(32)

G.load_state_dict(
    torch.load("Checkpoints/G_22.pt", map_location=torch.device('cpu')))
GR.load_state_dict(
    torch.load("Checkpoints/GR_7.pt", map_location=torch.device('cpu')))

img = plt.imread(path_i)
faces = detect_face(img)

if len(faces) == 0:
    print("No faces detected")
    exit()

resz = cv2.resize(faces[0], (100, 100))
plt.imsave("out_ld.png", resz)

resz = resz.reshape(1, 100, 100, 3)
resz = np.transpose(resz, (0, 3, 1, 2))
resz = torch.from_numpy(resz)
resz = resz.float()
inp = scale(resz)
out1 = infer(G, inp)
out2 = infer(GR, inp)
Ejemplo n.º 15
0
#! /usr/bin/env python
import cv2
from swap import swap_images
from utils import landmark_detection, detect_face, readPoints

if __name__ == '__main__':
    # Read images

    cap = cv2.VideoCapture(0)

    while (True):
        # Capture frame-by-frame
        ret, img = cap.read()
        gray, rects = detect_face(img)
        if len(rects) == 1:
            points1, _ = landmark_detection(gray, rects)
            filename2 = 'donald_trump.jpg'
            img2 = cv2.imread(filename2)
            points2 = readPoints(filename2 + '.txt')

            output = swap_images(img2, img, points2, points1)
            cv2.imshow("Face Swapped", output)
        elif len(rects) == 2:
            points1, points2 = landmark_detection(gray, rects)
            output = swap_images(img, img, points2, points1)
            output = swap_images(img, output, points1, points2)
            cv2.imshow("Face Swapped", output)
        else:
            cv2.imshow("Face Swapped", img)

        if cv2.waitKey(1) & 0xFF == ord('q'):