# corners_homogeneous[:,0] = corners[:,1] # corners_homogeneous[:,1] = corners[:,0] # corners = corners_homogeneous if white_pixels.shape[0]: for i in range(white_pixels.shape[0]): cv2.circle(frame, tuple(white_pixels[i, :2].astype(np.int32)), 1, (0, 255, 0), 3) # imshow(frame, 3) img_virtual, T_img_to_virtual = project_keyboard.project_image( frame, corners) # points_virtual = project_keyboard.virtual_keyboard_corners() # T_img_to_virtual = project_keyboard.perspective_transformation(corners, points_virtual) white_pixels_virtual = white_pixels.dot(T_img_to_virtual.T) white_pixels_virtual /= white_pixels_virtual[:, -1, np.newaxis] for i in range(white_pixels.shape[0]): cv2.circle(img_virtual, tuple(white_pixels_virtual[i, :2].astype(np.int32)), 10, (255, 0, 0), 5) imshow(img_virtual, wait=1) key = project_keyboard.majority_key_label(white_pixels_virtual) # if(np.max(neg_diff) > 0): # print "negative difference" # cv2.imshow(window_name,neg_diff) # if cv2.waitKey() & 0xFF == 27 : # break cap.release() cv2.destroyAllWindows()
def __init__(self, video_file, camera_side=None, calibration_file=None, use_fixed_parameters=True): # open the video self.video_stream = cv2.VideoCapture(video_file) if (not self.video_stream.isOpened()): print "error reading video\n" quit() # read the first frame, use it to find the side of the camera, the zone of interest and initialize the baseline ret, self.frame = self.video_stream.read() ret, self.frame = self.video_stream.read() self.corners, self.pos_camera = corners.find_corners(self.frame) self.updateBaseline(self.frame) # Load calibration parameters if self.pos_camera == "right": self.calibration = "../data/calibration/nexus5.mp4.npz" elif self.pos_camera == "left": self.calibration = "../data/calibration/galaxy_s7-7.mp4.npz" if self.calibration: npz_calibration = np.load(self.calibration) self.camera_matrix = npz_calibration["camera_matrix"] self.dist_coefs = npz_calibration["dist_coefs"] self.window_name = 'Predictor_' + self.pos_camera if use_fixed_parameters: if self.pos_camera == 'right': npz_parameters = np.load( '../data/fixed_parameters/Transformations_right.npz') self.corners = npz_parameters["corners"] self.T_img_to_virtual = npz_parameters["T_img_to_virtual"] self.T_virtual_to_img = npz_parameters["T_virtual_to_img"] self.frame = cv2.imread( '../data/fixed_parameters/baseline_right.png') else: if 'individual_keys' in video_file: npz_parameters = np.load( '../data/fixed_parameters/Transformations_left_ind_keys.npz' ) self.corners = npz_parameters["corners"] self.T_img_to_virtual = npz_parameters["T_img_to_virtual"] self.T_virtual_to_img = npz_parameters["T_virtual_to_img"] self.frame = cv2.imread( '../data/fixed_parameters/baseline_left_ind_keys.png') else: npz_parameters = np.load( '../data/fixed_parameters/Transformations_left.npz') self.corners = npz_parameters["corners"] self.T_img_to_virtual = npz_parameters["T_img_to_virtual"] self.T_virtual_to_img = npz_parameters["T_virtual_to_img"] self.frame = cv2.imread( '../data/fixed_parameters/baseline_left.png') self.updateBaseline(self.frame) self.key_map = project_keyboard.key_map(self.frame.shape, self.T_virtual_to_img, self.pos_camera) self.key_mask = (self.key_map > 0)[:, :, np.newaxis] else: # get a suitable frame for the baseline cv2.namedWindow(self.window_name + "_baseline", cv2.WINDOW_NORMAL) cv2.resizeWindow(self.window_name + "_baseline", 400, 550) print "\n******************************************************************************************" print "Select baseline. Press Enter if the image is satisfying, and space to go to the next image" print "******************************************************************************************" while True: self.advanceFrame(filter_corners=False, update_projection=True) imshow(self.frame_marked, window=self.window_name + "_baseline") if cv2.waitKey() == 10: break self.updateBaseline(self.frame) self.corners, pos_camera = corners.find_corners(self.frame) cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(self.window_name, 400, 550) cv2.setMouseCallback(self.window_name, self.click_callback) self.fp = Filter.ProximityFilter(self.corners, 50) self.fb = Filter.ButterworthFilter(self.corners, 0.3, 2) # # find the hsv treshold for the hand if self.pos_camera == 'right': self.hand_threshold_low = np.array([0, 10, 50]) self.hand_threshold_high = np.array([50, 255, 255]) # self.hand_threshold_low = np.array([0, 30, 120]) # self.hand_threshold_high = np.array([40, 150, 255]) elif self.pos_camera == 'left': self.hand_threshold_low = np.array([0, 50, 80]) self.hand_threshold_high = np.array([200, 255, 255]) # self.skin_pixel_location = list() # print "\n***********************" # print "Right click on the skin" # print "***********************" # cv2.setMouseCallback(self.window_name+"_baseline", self.click_callback) # imshow(self.frame, window=self.window_name+"_baseline") # cv2.waitKey() # low_h_skin = max(self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1],0]-25,0) # high_h_skin = min(self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1],0]+25,255) # low_s_skin = max(self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1],1]-50,0) # high_s_skin = min(self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1],1]+50,255) # low_v_skin = max(self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1],2]-50,0) # high_v_skin = min(self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1],2]+50,255) # self.hand_threshold_low = np.array([low_h_skin, low_s_skin, low_v_skin]) # self.hand_threshold_high = np.array([high_h_skin, high_s_skin, high_v_skin]) # print "low skin hsv threshold : ", self.hand_threshold_low # print "high skin hsv threshold : ", self.hand_threshold_high # print self.baseline_hsv[self.skin_pixel_location[0],self.skin_pixel_location[1]] # TODO: tune diff thresholds self.pos_diff_threshold_low = np.array([0, 0, 120]) self.pos_diff_threshold_high = np.array([255, 255, 255]) self.neg_diff_threshold_low = np.array([0, 0, 120]) self.neg_diff_threshold_high = np.array([255, 255, 255])
def countKeyDiffs(self, show_img=False): # start_time = time.time() # Use HSV frame_hsv = cv2.cvtColor(self.frame, cv2.COLOR_BGR2HSV) # find hand position hand_mask = cv2.inRange(frame_hsv, self.hand_threshold_low, self.hand_threshold_high) # imshow(hand_mask, self.window_name) # print "low skin hsv threshold : ", self.hand_threshold_low # print "high skin hsv threshold : ", self.hand_threshold_high # imshow(hand_mask, window=self.window_name, wait=1) # cv2.waitKey(50) # Find differences pos_diff = (self.baseline_hsv.astype(np.int32) - frame_hsv.astype(np.int32)) * self.key_mask neg_diff = (frame_hsv.astype(np.int32) - self.baseline_hsv.astype(np.int32)) * self.key_mask pos_diff_v = pos_diff[:, :, 2] neg_diff_v = neg_diff[:, :, 2] # Clip negative differences in V channel pos_diff_v[pos_diff_v < 0] = 0 neg_diff_v[neg_diff_v < 0] = 0 # Take abs of negative differences in H, S channels pos_diff = np.abs(pos_diff).astype(np.uint8) neg_diff = np.abs(neg_diff).astype(np.uint8) pos_diff = cv2.inRange(pos_diff, self.pos_diff_threshold_low, self.pos_diff_threshold_high) neg_diff = cv2.inRange(neg_diff, self.neg_diff_threshold_low, self.neg_diff_threshold_high) # remove the hand # imshow(pos_diff, window=self.window_name, wait=1) # imshow(neg_diff, window=self.window_name, wait=1) pos_diff[np.argwhere(hand_mask)[:, 0], np.argwhere(hand_mask)[:, 1]] = 0 neg_diff[np.argwhere(hand_mask)[:, 0], np.argwhere(hand_mask)[:, 1]] = 0 # imshow(pos_diff, window=self.window_name, wait=1) # imshow(neg_diff, window=self.window_name, wait=1) # print np.argwhere(hand_mask) # Find pixel coordinates pixels_pos_diff = np.argwhere(pos_diff)[:, ::-1] pixels_neg_diff = np.argwhere(neg_diff)[:, ::-1] # Count detected keys for diff pixels counts = Counter() idx_pos_diff = np.zeros((pixels_pos_diff.shape[0], ), np.bool) for i in range(pixels_pos_diff.shape[0]): key = project_keyboard.key_label(self.key_map, pixels_pos_diff[i]) if key is None or project_keyboard.is_black(key): continue counts[key] += 1 idx_pos_diff[i] = 1 idx_neg_diff = np.zeros((pixels_neg_diff.shape[0], ), np.bool) for i in range(pixels_neg_diff.shape[0]): key = project_keyboard.key_label(self.key_map, pixels_neg_diff[i]) if key is None or project_keyboard.is_white(key): continue counts[key] += 1 idx_neg_diff[i] = 1 # if show_img and counts: if counts: pixels_pos_diff = homogenize( pixels_pos_diff[idx_pos_diff, :]).astype(np.int32) pixels_neg_diff = homogenize( pixels_neg_diff[idx_neg_diff, :]).astype(np.int32) self.frame_marked[pixels_pos_diff[:, 1], pixels_pos_diff[:, 0]] = np.array((0, 255, 0)) self.frame_marked[pixels_neg_diff[:, 1], pixels_neg_diff[:, 0]] = np.array((0, 0, 255)) pixels_pos_diff_virtual = dehomogenize( pixels_pos_diff.dot(self.T_img_to_virtual.T)).astype(np.int32) pixels_neg_diff_virtual = dehomogenize( pixels_neg_diff.dot(self.T_img_to_virtual.T)).astype(np.int32) pixels_pos_diff_virtual = np.clip( pixels_pos_diff_virtual, 0, np.array(self.img_virtual.shape[:2]) - 1) pixels_neg_diff_virtual = np.clip( pixels_neg_diff_virtual, 0, np.array(self.img_virtual.shape[:2]) - 1) self.img_virtual[pixels_pos_diff_virtual[:, 0], pixels_pos_diff_virtual[:, 1]] = np.array( (0, 255, 0)) self.img_virtual[pixels_neg_diff_virtual[:, 0], pixels_neg_diff_virtual[:, 1]] = np.array( (0, 0, 255)) if show_img: imshow(self.frame_marked, scale_down=3, window=self.window_name, wait=1) imshow(self.img_virtual, window=self.window_name + "_virtual", wait=1) # print "elapsed time in countKeyDiff : ", time.time() - start_time return counts
print corners corners = corners.astype(np.int32) for c in corners: cv2.circle(frame_marked, tuple(c[:2]), 10, (0,0,255), 3) # Find projection matrix and update key map/mask T_img_to_virtual, T_virtual_to_img = project_keyboard.find_projection(corners) key_map = project_keyboard.key_map(frame.shape, T_virtual_to_img, pos_camera) key_mask = (key_map > 0)[:,:,np.newaxis] # print T_img_to_virtual # if project_image: img_virtual = project_keyboard.project_image(frame, key_mask, T_img_to_virtual) # print img_virtual cv2.namedWindow( 'marked image', cv2.WINDOW_NORMAL); cv2.resizeWindow( 'marked image', 400, 550); # cv2.namedWindow( 'virtual image', cv2.WINDOW_NORMAL); # cv2.resizeWindow( 'virtual image', 400, 550); cv2.imshow('marked image', frame_marked) # cv2.imshow('baseline', frame) project_keyboard.imshow(img_virtual, window="virtual image", wait=1) # cv2.imshow('virtual image', img_virtual) cv2.waitKey() if save: np.savez(save_folder + 'Transformations_left.npz',T_img_to_virtual=T_img_to_virtual, T_virtual_to_img=T_virtual_to_img, corners=corners) cv2.imwrite(save_folder + 'baseline_left.png',frame)
def find_corners(img, pos_camera=None, mark_img=True, show_img=False, img_white_keys=None): # Convert to HSV color space img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # Equalize V channel img_hsv[:,:,2] = clahe.apply(img_hsv[:,:,2]) if show_img: imshow(np.hstack((img_hsv[:,:,0], img_hsv[:,:,1], img_hsv[:,:,2])), 3) # Threshold white keys img_w = cv2.inRange(img_hsv, np.array([20, 0, 200]), np.array([160, 60, 255])) img_h = cv2.inRange(img_hsv, np.array([20, 0, 0]), np.array([160, 255, 255])) img_s = cv2.inRange(img_hsv, np.array([0, 0, 0]), np.array([255, 60, 255])) img_v = cv2.inRange(img_hsv, np.array([0, 0, 200]), np.array([255, 255, 255])) if show_img: imshow(np.hstack((img_h, img_s, img_v)), 3) # Fill white keys with watershed img_w[:,:10] = 255 img_w[:,-10:] = 255 img_w[:200,:] = 255 img_w[-200:,:] = 255 cv2.floodFill(img_w, np.zeros((img_w.shape[0]+2, img_w.shape[1]+2), dtype=np.uint8), (0,0), 0) if show_img: imshow(img_w, 3) img_fg = cv2.morphologyEx(img_w, cv2.MORPH_ERODE, np.ones((10, 10))) img_bg = cv2.morphologyEx(img_w, cv2.MORPH_DILATE, np.ones((100, 100))) img_bg[:,:10] = 0 img_bg[:,-10:] = 0 img_bg[:200,:] = 0 img_bg[-200:,:] = 0 img_bg = 255 - img_bg markers = np.zeros(img_w.shape, dtype=np.int32) markers[img_fg > 0] = 1 markers[img_bg > 0] = 2 markers = cv2.watershed(img, markers) img_w = (255 * (markers == 1)).astype(np.uint8) if show_img: imshow(img_w, 3) cv2.floodFill(img_w, np.zeros((img_w.shape[0]+2, img_w.shape[1]+2), dtype=np.uint8), (0,0), 0) # Find orientation of largest connected component _, contours, _ = cv2.findContours(img_w, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) areas = np.array([cv2.contourArea(c) for c in contours]) vx, vy, x, y = cv2.fitLine(contours[areas.argmax()], cv2.DIST_L2, 0, 0.01, 0.01) V = np.vstack((vx, vy)) # Determine camera position from keyboard orientation if pos_camera is None: if V[0] * V[1] > 0: pos_camera = "right" else: pos_camera = "left" # Dilate image with keyboard-aligned kernel and extract largest connected component theta = np.arctan2(V[0], V[1]) kernel = 255 * np.round(scipy.ndimage.rotate(np.ones((150, 2)), theta * 180/np.pi)).astype(np.uint8) img_cc = cv2.morphologyEx(img_w, cv2.MORPH_DILATE, kernel) _, contours, _ = cv2.findContours(img_cc, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) areas = np.array([cv2.contourArea(c) for c in contours]) contour = np.squeeze(contours[areas.argmax()], axis=1) img_cc.fill(0) cv2.drawContours(img_cc, contours, areas.argmax(), 255, -1) img_w = 255 * np.logical_and(img_w>0, img_cc>0).astype(np.uint8) if show_img: imshow(img_cc, 3) # Find combined contour of keyboard segmentations _, contours, _ = cv2.findContours(img_w, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) img_w.fill(0) if mark_img: for i in range(len(contours)): cv2.drawContours(img, contours, i, (0,0,255), 3) for i in range(len(contours)): cv2.drawContours(img_w, contours, i, 255, -1) areas = np.array([cv2.contourArea(c) for c in contours]) contours = [np.squeeze(contour, axis=1) for contour in contours] contour = np.vstack(contours) contour = np.column_stack((contour, np.ones(contour.shape[0]))) if show_img: imshow(img_w, 3) # Find corners corner_left = contour[contour[:,0].argmin()].astype(np.int32) corner_right = contour[contour[:,0].argmax()].astype(np.int32) corner_bottom = contour[contour.shape[0]-1-contour[:,1][::-1].argmax()].astype(np.int32) corner_top = contour[contour[:,1].argmin()].astype(np.int32) # Push up bottom corner to the key's surface num_white = 0 for i in range(40): vec_w = img_w[corner_bottom[1]-i,corner_bottom[0]-20:corner_bottom[0]+20]>0 num_white_next = vec_w.sum() if num_white_next - num_white > 3: corner_bottom[1] -= i - 1 break num_white = num_white_next # Refine corner with subpixel search corners = np.row_stack((corner_left[:2], corner_right[:2], corner_bottom[:2], corner_top[:2])) criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001) corners = cv2.cornerSubPix(img_v,np.float32(corners.astype(np.float64)),(10,10),(-1,-1),criteria) corner_left = np.round(np.append(corners[0], 1)).astype(np.int32) corner_right = np.round(np.append(corners[1], 1)).astype(np.int32) corner_bottom = np.round(np.append(corners[2], 1)).astype(np.int32) corner_top = np.round(np.append(corners[3], 1)).astype(np.int32) # Determine front and back sides of the keyboard if pos_camera == "right": corner_back = corner_right corner_front = corner_left contour_back = img_w.shape[1] - 1 - np.argmax(img_w[corner_top[1]:corner_back[1]+1,::-1]>0, axis=1) else: corner_back = corner_left corner_front = corner_right contour_back = np.argmax(img_w[corner_top[1]:corner_back[1]+1]>0, axis=1) # Find back contour idx = np.logical_and(contour_back<img_w.shape[1]-1, contour_back>0) contour_back = np.column_stack((contour_back, \ np.arange(corner_top[1], corner_back[1]+1), np.ones((contour_back.shape[0],), dtype=np.int32))) contour_back = contour_back[idx] contour_back_origin = contour_back - corner_back cv2.polylines(img, np.int32([contour_back[:,:2]]), False, (0,255,255), 5) corner_top = contour_back[contour_back[:,1].argmin()] # Rotate line from vertical position until it hits the back contour num_hit = 0 if pos_camera == "right": sign_theta = 1 else: sign_theta = -1 for theta in np.linspace(0,np.pi/2,90): line_back = [sign_theta * np.cos(theta), -np.sin(theta), 0] num_hit_new = np.sum(np.dot(contour_back_origin, line_back)>0) # Stop when the gradient of hit pixels spikes if num_hit_new - num_hit > contour_back.shape[0] / 30 and theta > 0: break num_hit = num_hit_new line_back[-1] = -np.dot(corner_back, line_back) # Update contour to include only points close to the line contour_back = contour_back[np.abs(np.dot(contour_back, line_back))<10,:] if mark_img: cv2.polylines(img, np.int32([contour_back[:,:2]]), False, (0,255,0), 5) dir_line_back = np.array([line_back[1], -line_back[0]]) points_line_back = np.array([2000, -2000])[:,np.newaxis] * dir_line_back[np.newaxis,:] + corner_back[np.newaxis,:2] cv2.line(img, tuple(points_line_back[0].astype(np.int32)), tuple(points_line_back[1].astype(np.int32)), (255,0,255), 5) # Fit least squares line to back contour # U, S, VT = np.linalg.svd(contour_back[:,:2] - corner_back[:2]) U, S, VT = np.linalg.svd(contour_back[:,:2] - contour_back[:,:2].mean(axis=0)) line_back = np.append(VT[-1], 0) line_back[-1] = -np.dot(corner_back, line_back) if mark_img: dir_line_back = np.array([line_back[1], -line_back[0]]) points_line_back = np.array([2000, -2000])[:,np.newaxis] * dir_line_back[np.newaxis,:] + corner_back[np.newaxis,:2] cv2.line(img, tuple(points_line_back[0].astype(np.int32)), tuple(points_line_back[1].astype(np.int32)), (255,0,0), 5) # Find intersection between back and top lines corner_top_mid = corner_top line_top = np.cross(corner_top, corner_front).astype(np.float32) line_top /= np.linalg.norm(line_top) corner_top = np.cross(line_back, line_top) corner_top /= corner_top[-1] corner_top = np.round(corner_top).astype(np.int32) # Plot corners if mark_img: cv2.circle(img, tuple(corner_top[:2]), 10, (0,255,0), 3) cv2.circle(img, tuple(corner_right[:2]), 10, (255,255,0), 3) cv2.circle(img, tuple(corner_bottom[:2]), 10, (255,0,0), 3) cv2.circle(img, tuple(corner_left[:2]), 10, (255,0,255), 3) cv2.circle(img, tuple(corner_top_mid[:2]), 10, (255,0,255), 3) # Collect corners if pos_camera == "left": corners = np.row_stack((corner_left, corner_top, corner_right, corner_bottom)) else: corners = np.row_stack((corner_top, corner_right, corner_bottom, corner_left)) if img_white_keys is not None: img_white_keys[:,:] = img_w return corners, pos_camera
points_img_flat = points_virtual.dot(T_virtual3d_to_img.T) points_img_flat /= points_img_flat[:,-1,np.newaxis] points_virtual_flat = points_img_flat.dot(T_img_to_virtual.T) points_virtual_flat /= points_virtual_flat[:,-1,np.newaxis] points_virtual_flat = points_virtual_flat.astype(np.int32) cv2.circle(img_virtual, tuple(points_virtual_flat[0,:2][::-1]), 10, (0,0,255), 3) cv2.circle(img_virtual, tuple(points_virtual_flat[1,:2][::-1]), 10, (0,0,255), 3) cv2.circle(img_virtual, tuple(points_virtual_flat[2,:2][::-1]), 10, (0,0,255), 3) # Create map of key indices print(T_virtual3d_to_img) # img_map = project_keyboard.key_map(img.shape, T_virtual3d_to_img, pos_camera) img_map = project_keyboard.key_map(img.shape, T_virtual_to_img, pos_camera) # if show_img: imshow(img_map, 3) # Plot black keys # TODO: Remove # if pos_camera == "left": # black_keys = list(reversed(list(project_keyboard.black_keys()))) # else: # black_keys = list(project_keyboard.black_keys()) # colors = [(0,0,255),(0,255,255),(0,255,0),(255,0,0),(255,0,255)] # idx = 0 # for key in black_keys: # bbox_black_virtual3d = project_keyboard.bounding_box(key) # bbox_black_virtual3d = np.column_stack((bbox_black_virtual3d, np.ones(bbox_black_virtual3d.shape[0],))) # bbox_black_img = bbox_black_virtual3d.dot(T_virtual3d_to_img.T) # bbox_black_img = (bbox_black_img[:,:2] / bbox_black_img[:,2,np.newaxis]).astype(np.int32) # hull_black_img = np.squeeze(cv2.convexHull(bbox_black_img))