def main(): if len(sys.argv) != 2: print('incorrect format of execution') return path_to_images = sys.argv[1] if not os.path.exists(path_to_images): print('incorrect path to images') return people_with_glasses = [] input_shape = (150, 150, 3) model = create_convnet(input_shape) model.load_weights('./models/convnet.h5') print('loading model...') print('start detection') for f in os.listdir(path_to_images): f_full = os.path.join(path_to_images, f) image = detect_face(f_full) if image is None: print("detector couldn't find a face: %s" % f) continue result = model.predict(np.array([image]), batch_size=16) if result[0] > 0.5: people_with_glasses.append(f_full) print('done!') print("detected %d image (people with glasses)" % len(people_with_glasses)) people_with_glasses.sort() if people_with_glasses: print('\n'.join(people_with_glasses)) with open('results.txt', 'w') as f: for path in people_with_glasses: f.write(path + '\n')
def add_frame(self, image): mask = utils.segment(image) face, foundFace = utils.detect_face(image) mask = utils.eliminate_face(face, foundFace, mask) if self.__using_stabilization: mask = utils.stabilize(foundFace, self.__no_of_frames + 1, image, face, mask) hand, hand_contour = utils.get_my_hand(mask, True) hand_pose, direction = 'None', 'None' if hand_contour is not None: motion_detected = self.__motion.get_hand_motion(hand_contour) else: return if not motion_detected: hand_pose = recognise_hand_pose( hand, directly_from_hand=True, model_path='Models/silatra_gesture_signs.sav') else: direction = motion_detected self.__observations.append((hand_pose, direction)) self.__no_of_frames += 1
def read_image(filename): img = cv2.imread(filename) rectangles = utils.detect_face(img) faces = [] for rect in rectangles: landmarks = utils.align_face(img, rect) faces.append(((rect.left(), rect.top(), rect.right(), rect.bottom()), landmarks)) return img, faces
def detect_face_and_save(input_folder, output_folder, filename): output_path = os.path.join(output_folder, filename) if os.path.exists(output_path): return path_to_image = os.path.join(input_folder, filename) image_array = detect_face(path_to_image) if image_array is None: return 0 image = Image.fromarray(image_array) image.save(output_path) return 1
def load_image(self, filename): img = cv2.imread(filename) rectangles = utils.detect_face(img) if not rectangles: return False rect = rectangles[0] landmarks = utils.align_face(img, rect) self.img_rect = (rect.left(), rect.top(), rect.width(), rect.height()) self.clear_sequence() self.add_to_sequence(img, landmarks) return True
def _preprocess(color_path, depth_path, label, save_path, length, img_size): if not (color_path.exists() and depth_path.exists()): print('Sample not found, skipped. {}'.format( color_path.parents[0])) return # read color, depth frames color_video = dataio.read_video(color_path) depth_video = dataio.read_video(depth_path) T, H, W, C = color_video.shape if T < length: return # crop to be a square (H, H) video, tr_y, tr_x, bl_y, bl_x = utils.detect_face(color_video) if tr_y == -1: return center_x = (tr_x - bl_x) // 2 + bl_x left_x = max(center_x - (H // 2), 0) color_video = color_video[:, :, left_x:left_x + H] depth_video = depth_video[:, :, left_x:left_x + H] # resize color_video = [ imresize(img, (img_size, img_size)) for img in color_video ] depth_video = [ imresize(img, (img_size, img_size), 'nearest') for img in depth_video ] color_video, depth_video = np.stack(color_video), np.stack(depth_video) depth_video = depth_video[..., 0] # save as grayscale image # save name = "{}_{}_{}".format(color_path.parents[0].name, color_path.name[2:7], label) dataio.save_video_as_images(color_video, save_path / name / 'color') dataio.save_video_as_images(depth_video, save_path / name / 'depth') (save_path / 'color').mkdir(parents=True, exist_ok=True) (save_path / 'depth').mkdir(parents=True, exist_ok=True) dataio.write_video(color_video, save_path / 'color' / (name + ".mp4")) dataio.write_video(depth_video, save_path / 'depth' / (name + ".mp4")) return [name, T]
def detect(self, img, landmarks=False): """ :param img: :param landmarks: :return: """ with torch.no_grad(): batch_boxes, batch_points = detect_face(img, self.min_face_size, self.pnet, self.rnet, self.onet, self.thresholds, self.factor, self.device) boxes, conf, points = [], [], [] for box, point in zip(batch_boxes, batch_points): box = np.array(box) # (len(box), 5) point = np.array(point) # (len(box), 5) if len(box) == 0: boxes.append(None) conf.append(None) points.append(None) elif self.select_largest: # 按照框的面积从大到小排序索引 box_order = np.argsort( (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1] box = box[box_order] point = point[box_order] boxes.append(box[:, :4]) conf.append(box[:, 4]) points.append(point) pass else: boxes.append(box[:, :4]) conf.append(box[:, 4]) points.append(point) boxes = np.array(boxes) conf = np.array(conf) points = np.array(points) if not isinstance(img, (list, tuple)): boxes = boxes[0] conf = conf[0] points = points[0] if landmarks: return boxes, conf, points return boxes, conf
def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/silatra_digits_and_letters.sav', using_stabilization=False, no_of_frames=1): ''' ### SiLaTra Hand Pose Recognition Provides classification for input hand pose image. Inputs: (a) Mandatory Parameter - Image for which Hand Pose Classification is to be performed. (b) Optional Parameters (Use them only if you understand them): (1) directly_from_hand - boolean - Set this to true if you are passing already cropped hand region in `image` parameter. (2) model_path - String - If an alternate model is to be used, pass the path of its .sav file. (3) using_stabilization - boolean - If you intend to use Object stabilization, set this to True. Only use this option if you are classifying hand poses from a continuous feed, else its useless. (4) no_of_frames - Integer - ONLY TO BE USED IF using_stabilization IS True, pass the number of the frame from the continuous feed you are processing. ''' import pickle from sklearn.neighbors import KNeighborsClassifier if not directly_from_hand: mask = utils.segment(image) face, foundFace = utils.detect_face(image) mask = utils.eliminate_face(face, foundFace, mask) if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask) hand = utils.get_my_hand(mask) if hand is False: return 'No hand pose in image' features = utils.extract_features(hand) else: features = utils.extract_features(image) classifier = pickle.load(open(model_path, 'rb')) hand_pose = classifier.predict([features])[0] return hand_pose
def extract_oneface(image, marigin=16): # detecting faces image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) h, w, c = image.shape total_boxes, points = detect_face(image, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709) for idx, (bounding_box, keypoints) in enumerate(zip(total_boxes, points.T)): bounding_boxes = { 'box': [ int(bounding_box[0]), int(bounding_box[1]), int(bounding_box[2] - bounding_box[0]), int(bounding_box[3] - bounding_box[1]) ], 'confidence': bounding_box[-1], 'keypoints': { 'left_eye': (int(keypoints[0]), int(keypoints[5])), 'right_eye': (int(keypoints[1]), int(keypoints[6])), 'nose': (int(keypoints[2]), int(keypoints[7])), 'mouth_left': (int(keypoints[3]), int(keypoints[8])), 'mouth_right': (int(keypoints[4]), int(keypoints[9])), } } bounding_box = bounding_boxes['box'] keypoints = bounding_boxes['keypoints'] # align face and extract it out align_image = align_face(image, keypoints) align_image = cv2.cvtColor(align_image, cv2.COLOR_RGB2BGR) xmin = max(bounding_box[0] - marigin, 0) ymin = max(bounding_box[1] - marigin, 0) xmax = min(bounding_box[0] + bounding_box[2] + marigin, w) ymax = min(bounding_box[1] + bounding_box[3] + marigin, h) crop_image = align_image[ymin:ymax, xmin:xmax, :] # "just need only one face" return crop_image
def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/digits_and_letters.sav', using_stabilization=False, no_of_frames=1): import pickle from sklearn.neighbors import KNeighborsClassifier if not directly_from_hand: mask = utils.segment(image) face, foundFace = utils.detect_face(image) mask = utils.eliminate_face(face, foundFace, mask) if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask) hand = utils.get_my_hand(mask) if hand is False: return 'No hand pose in image' features = utils.extract_features(hand) else: features = utils.extract_features(image) classifier = pickle.load(open(model_path, 'rb')) hand_pose = classifier.predict([features])[0] return hand_pose
def __init__(self, args): self.args = args self.detector_faces, self.detector_landmarks = detect_face(args.model) self.sunglasses, self.points = load_assets(args.image) self.sun_h, self.sun_w, _ = self.sunglasses.shape
def run_on_image(net, height_size, cpu, track, smooth, img, stride, upsample_ratio, num_keypoints, threshold): global previous_poses orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) score = 0 total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: # cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), r_hand_center, r_hand_width, l_hand_center, l_hand_width, = detect_hand( pose) if -1 not in r_hand_center: cv2.circle(img, (r_hand_center[0], r_hand_center[1]), 5, (255, 0, 0), 5) cv2.rectangle(img, (r_hand_center[0] - r_hand_width, r_hand_center[1] - r_hand_width), (r_hand_center[0] + r_hand_width, r_hand_center[1] + r_hand_width), (0, 255, 255)) if -1 not in l_hand_center: cv2.circle(img, (l_hand_center[0], l_hand_center[1]), 5, (255, 0, 0), 5) cv2.rectangle(img, (l_hand_center[0] - l_hand_width, l_hand_center[1] - l_hand_width), (l_hand_center[0] + l_hand_width, l_hand_center[1] + l_hand_width), (0, 255, 255)) face_center, face_width = detect_face(pose) if -1 not in face_center: cv2.rectangle( img, (face_center[0] - face_width, face_center[1] - face_width), (face_center[0] + face_width, face_center[1] + face_width), (0, 0, 255)) # (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (face_center[0] - face_width, face_center[1] - face_width - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) if -1 not in r_hand_center: x, y, h, w, score = detect_touch(face_center, face_width, r_hand_center, r_hand_width) if h != 0: cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 255)) cv2.putText(img, f'Score: {score:0.2f}', (x, y - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0)) if -1 not in l_hand_center: x, y, h, w, score = detect_touch(face_center, face_width, l_hand_center, l_hand_width) if h != 0: cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 255)) cv2.putText(img, f'Score: {score:0.2f}', (x, y - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) delay = 1 detect = False key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33 return score > threshold
cv2.namedWindow("detecting face") cap = cv2.VideoCapture(0) while(cap.isOpened()): ret,image = cap.read() if ret == True: # resize image image_h, image_w, _ = image.shape new_h, new_w = int(0.5*image_h), int(0.5*image_w) image = cv2.resize(image, (new_w, new_h)) org_image = image.copy() # detecting faces # t1 = time.time() image = cv2.cvtColor(image ,cv2.COLOR_BGR2RGB) total_boxes, points = detect_face(image, 20, pnet, rnet, onet, [0.6, 0.7, 0.7], 0.709) # t2 = time.time() # print("time: %.2fms" %((t2-t1)*1000)) for idx, (bounding_box, keypoints) in enumerate(zip(total_boxes, points.T)): bounding_boxes = { 'box': [int(bounding_box[0]), int(bounding_box[1]), int(bounding_box[2]-bounding_box[0]), int(bounding_box[3]-bounding_box[1])], 'confidence': bounding_box[-1], 'keypoints': { 'left_eye': (int(keypoints[0]), int(keypoints[5])), 'right_eye': (int(keypoints[1]), int(keypoints[6])), 'nose': (int(keypoints[2]), int(keypoints[7])), 'mouth_left': (int(keypoints[3]), int(keypoints[8])), 'mouth_right': (int(keypoints[4]), int(keypoints[9])), }
print("Wrong input format, example : ") print("python test.py input_image_path") exit() path_i = args[1] G = Generator(32) GR = Generator_R(32) G.load_state_dict( torch.load("Checkpoints/G_22.pt", map_location=torch.device('cpu'))) GR.load_state_dict( torch.load("Checkpoints/GR_7.pt", map_location=torch.device('cpu'))) img = plt.imread(path_i) faces = detect_face(img) if len(faces) == 0: print("No faces detected") exit() resz = cv2.resize(faces[0], (100, 100)) plt.imsave("out_ld.png", resz) resz = resz.reshape(1, 100, 100, 3) resz = np.transpose(resz, (0, 3, 1, 2)) resz = torch.from_numpy(resz) resz = resz.float() inp = scale(resz) out1 = infer(G, inp) out2 = infer(GR, inp)
#! /usr/bin/env python import cv2 from swap import swap_images from utils import landmark_detection, detect_face, readPoints if __name__ == '__main__': # Read images cap = cv2.VideoCapture(0) while (True): # Capture frame-by-frame ret, img = cap.read() gray, rects = detect_face(img) if len(rects) == 1: points1, _ = landmark_detection(gray, rects) filename2 = 'donald_trump.jpg' img2 = cv2.imread(filename2) points2 = readPoints(filename2 + '.txt') output = swap_images(img2, img, points2, points1) cv2.imshow("Face Swapped", output) elif len(rects) == 2: points1, points2 = landmark_detection(gray, rects) output = swap_images(img, img, points2, points1) output = swap_images(img, output, points1, points2) cv2.imshow("Face Swapped", output) else: cv2.imshow("Face Swapped", img) if cv2.waitKey(1) & 0xFF == ord('q'):