def learn_frame(im_depth, skel_pos, offsets_1, offsets_2): ''' Get frame data ''' skel_pos = pts_to_surface(skel_pos, im_depth, thresh=50) ''' Compute features and labels ''' im_labels = get_per_pixel_joints(im_depth, skel_pos, 'geodesic') im_labels[(im_depth==0)*(im_labels>N_MSR_JOINTS)] = N_MSR_JOINTS+1 # im_labels[(im_depth>0)*(im_labels==255)] = N_MSR_JOINTS+1 mask = (im_labels<N_MSR_JOINTS) pixel_loc = np.nonzero(mask) pixel_labels = im_labels[pixel_loc] # embed() features = calculate_rf_features(im_depth, offsets_1, offsets_2, mask=mask) ''' Visualize ''' if 1: im_labels = np.repeat(im_labels[:,:,None], 3, -1) im_labels = display_skeletons(im_labels, skel_pos, (20,), skel_type='Low') cv2.putText(im_labels, "Blue=Truth", (10, 210), cv2.FONT_HERSHEY_DUPLEX, .5, (int(im_labels.max()/2), 0, 0)) cv2.putText(im_labels, "Green=Predict", (10, 230), cv2.FONT_HERSHEY_DUPLEX, .5, (0, int(im_labels.max()/2), 0)) cv2.imshow("feature_space", im_labels/im_labels.max().astype(np.float)) ret = cv2.waitKey(10) if ret > 0: exit return features, pixel_labels
def learn_frame(im_depth, skel_pos, offsets_1, offsets_2): ''' Get frame data ''' skel_pos = pts_to_surface(skel_pos, im_depth, thresh=50) ''' Compute features and labels ''' im_labels = get_per_pixel_joints(im_depth, skel_pos, 'geodesic') im_labels[(im_depth == 0) * (im_labels > N_MSR_JOINTS)] = N_MSR_JOINTS + 1 # im_labels[(im_depth>0)*(im_labels==255)] = N_MSR_JOINTS+1 mask = (im_labels < N_MSR_JOINTS) pixel_loc = np.nonzero(mask) pixel_labels = im_labels[pixel_loc] # embed() features = calculate_rf_features(im_depth, offsets_1, offsets_2, mask=mask) ''' Visualize ''' if 1: im_labels = np.repeat(im_labels[:, :, None], 3, -1) im_labels = display_skeletons(im_labels, skel_pos, (20, ), skel_type='Low') cv2.putText(im_labels, "Blue=Truth", (10, 210), cv2.FONT_HERSHEY_DUPLEX, .5, (int(im_labels.max() / 2), 0, 0)) cv2.putText(im_labels, "Green=Predict", (10, 230), cv2.FONT_HERSHEY_DUPLEX, .5, (0, int(im_labels.max() / 2), 0)) cv2.imshow("feature_space", im_labels / im_labels.max().astype(np.float)) ret = cv2.waitKey(10) if ret > 0: exit return features, pixel_labels
def plotUsers(image, users): # embed() # if type(users) == dict: # users = [u for u in users.values()] # for u in users: # uvw = [-1] for u in users.keys(): if users[u]['tracked']: xyz = users[u]['com'] uvw = skel2depth(np.array([xyz]), image.shape)[0] ''' Only plot if there are valid coordinates (not [0,0,0])''' if uvw[0] > 0: if users[u]['tracked'] and len( users[u]['jointPositions'].keys()) > 0: '''Colorize COM''' cv2.rectangle(image, tuple([uvw[0] - 3, uvw[1] - 3]), tuple([uvw[0] + 3, uvw[1] + 3]), (4000)) '''Plot skeleton''' pts = [j for j in users[u]['jointPositions'].values()] skel = skel2depth(np.array(pts), image.shape) from pyKinectTools.utils.SkeletonUtils import display_skeletons image = display_skeletons(image, skel, color=(image.max(), 0, 0), skel_type='Kinect')
def plotUsers(image, users): # embed() # if type(users) == dict: # users = [u for u in users.values()] # for u in users: # uvw = [-1] for u in users.keys(): if users[u]['tracked']: xyz = users[u]['com'] uvw = skel2depth(np.array([xyz]), image.shape)[0] ''' Only plot if there are valid coordinates (not [0,0,0])''' if uvw[0] > 0: if users[u]['tracked'] and len(users[u]['jointPositions'].keys()) > 0: '''Colorize COM''' cv2.rectangle(image, tuple([uvw[0]-3, uvw[1]-3]), tuple([uvw[0]+3, uvw[1]+3]), (4000)) '''Plot skeleton''' pts = [j for j in users[u]['jointPositions'].values()] skel = skel2depth(np.array(pts), image.shape) from pyKinectTools.utils.SkeletonUtils import display_skeletons image = display_skeletons(image, skel, color=(image.max(),0,0), skel_type='Kinect')
def infer_pose(self, depth): ''' depth : depth image ''' # Compute features and labels # embed() features = calculate_rf_features(depth, self.offsets[0], self.offsets[1]) pixel_loc = np.nonzero(depth > 0) pred = self.rf.predict(features) im_predict = np.ones_like(depth) + N_MSR_JOINTS + 1 im_predict[pixel_loc] = pred skel_pos_pred = pose_mean_shift(im_predict) ''' Visualize ''' if 1: cv2.imshow("prediction1", im_predict / float(im_predict.max())) im_predict = np.repeat(depth[:, :, None].astype(np.float), 3, -1) im_predict[depth > 0] -= depth[depth > 0].min() im_predict /= float(im_predict.max() / 255.) im_predict = im_predict.astype(np.uint8) # im_predict = display_skeletons(im_predict, skel_pos, (255,0,0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos_pred, (0, 255, 0), SKEL_DISPLAY_MODE) cv2.putText(im_predict, "Blue=Truth", (10, 210), cv2.FONT_HERSHEY_DUPLEX, .5, (int(im_predict.max() / 2), 0, 0)) cv2.putText(im_predict, "Green=Predict", (10, 230), cv2.FONT_HERSHEY_DUPLEX, .5, (0, int(im_predict.max() / 2), 0)) cv2.imshow("prediction", im_predict) ret = cv2.waitKey(10) # if ret > 0: break return skel_pos_pred, im_predict
def infer_pose(self,depth): ''' depth : depth image ''' # Compute features and labels # embed() features = calculate_rf_features(depth, self.offsets[0], self.offsets[1]) pixel_loc = np.nonzero(depth>0) pred = self.rf.predict(features) im_predict = np.ones_like(depth)+N_MSR_JOINTS+1 im_predict[pixel_loc] = pred skel_pos_pred = pose_mean_shift(im_predict) ''' Visualize ''' if 1: cv2.imshow("prediction1", im_predict/float(im_predict.max())) im_predict = np.repeat(depth[:,:,None].astype(np.float), 3, -1) im_predict[depth>0] -= depth[depth>0].min() im_predict /= float(im_predict.max()/255.) im_predict = im_predict.astype(np.uint8) # im_predict = display_skeletons(im_predict, skel_pos, (255,0,0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos_pred, (0,255,0), SKEL_DISPLAY_MODE) cv2.putText(im_predict, "Blue=Truth", (10, 210), cv2.FONT_HERSHEY_DUPLEX, .5, (int(im_predict.max()/2), 0, 0)) cv2.putText(im_predict, "Green=Predict", (10, 230), cv2.FONT_HERSHEY_DUPLEX, .5, (0, int(im_predict.max()/2), 0)) cv2.imshow("prediction", im_predict) ret = cv2.waitKey(10) # if ret > 0: break return skel_pos_pred, im_predict
def get_per_pixel_joints(im_depth, skel, alg='geodesic', radius=5): ''' Find the closest joint to each pixel using geodesic distances. ---Paramaters--- im_depth : should be masked depth image skel : in image coords alg : 'geodesic' or 'circular' radius : [only for circular] ''' height, width = im_depth.shape distance_ims = np.empty([height, width, N_SKEL_JOINTS]) MAX_VALUE = 32000 # Get rid of edges around joints edge_thresh = 200#10 gradients = np.gradient(im_depth) mag = np.sqrt(gradients[0]**2+gradients[1]**2) im_depth[mag>edge_thresh] = 0 joints_out_of_bounds = np.abs(im_depth[skel[:,0], skel[:,1]] - skel[:,2]) > 100 # Only look at major joints for i, j in zip(SKEL_JOINTS, range(N_SKEL_JOINTS)): if skel[i] in joints_out_of_bounds: distance_ims[:,:,j] = MAX_VALUE continue pos = skel[i] if alg == 'geodesic': x = np.maximum(np.minimum(pos[1], height-1), 0) y = np.maximum(np.minimum(pos[0], width-1), 0) # if j == 0: # pts, min_map = generateKeypoints(np.ascontiguousarray(im_depth), centroid=[x,y], iterations=10, scale=6) im_dist = distance_map(np.ascontiguousarray(im_depth), centroid=[x,y], scale=6) distance_ims[:,:,j] = im_dist if 0: radius = 5 x = np.maximum(np.minimum(pos[1], height-1-radius), radius) y = np.maximum(np.minimum(pos[0], width-1-radius), radius) # print x,y pts = circle(x,y, radius) # print pts[0] max_ = distance_ims[distance_ims[:,:,j]<MAX_VALUE, j].max() distance_ims[distance_ims[:,:,j]>=MAX_VALUE,j] = max_+100 distance_ims[pts[0],pts[1],j] = max_+100 figure(1) subplot(3,5,j+1) imshow(distance_ims[:,:,j]) axis('off') # subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.1, hspace=.1) tight_layout() subplots_adjust(left=.001, bottom=.001, wspace=.003, hspace=.003) elif alg == 'circular': x = np.maximum(np.minimum(pos[1], height-1-radius), radius) y = np.maximum(np.minimum(pos[0], width-1-radius), radius) pts = draw.circle(x,y, radius) closest_pos[pts] = i else: print "No algorithm type in 'get_per_pixel_joints'" if alg == 'geodesic': closest_pos = np.argmin(distance_ims, -1).astype(np.uint8) closest_pos_all = np.argsort(distance_ims, -1).astype(np.uint8) elif alg == 'circular': closest_pos = np.zeros_like(im_depth) #Change all background pixels # mask = im_depth!=0 # mask = (distance_ims.max(-1)<32000) mask = im_depth!=0 closest_pos[-mask] = N_MSR_JOINTS+1 if 0: # Reorder for occlusions skel_depths = np.array([skel[i,2] for i in SKEL_JOINTS]) skel_XYZ = np.array([skel[i] for i in SKEL_JOINTS]) pos_order = np.argsort(skel_depths).tolist() pos_order.reverse() residual = im_depth[mask] - skel_depths[closest_pos[mask]] im_tmp = np.zeros_like(im_depth) im_tmp[mask] = residual closest_arg = np.argmin(distance_ims+skel_depths, -1) closest_arg[-mask] = -1 # If geo dist < euc distance then kill # Go from back to front. Only care about things ahead from scipy.spatial import distance # euclid_dists = distance.squareform(distance.pdist(depth2world(skel_XYZ, [240,320]))) euclid_dists = distance.squareform(distance.pdist(skel_XYZ)) geo_dists = np.zeros_like(euclid_dists) for i in xrange(len(SKEL_JOINTS)): p = pos_order[i] # If a joint is behind it, don't care -- set to infinity geo_dists[p,:] = [distance_ims[skel[j][0],skel[j][1],p] if j in pos_order[i:] else np.inf for j in SKEL_JOINTS] # Check if it's incorrect at least 2x removed_joints = [] err = (geo_dists*2. < euclid_dists) for i in xrange(len(err)): if err[i].sum(0) >= 1: # removed_joints += [i] distance_ims[:,:,i] = MAX_VALUE closest_pos_e = np.argmin(distance_ims, -1).astype(np.uint8) closest_pos_e[-mask] = N_MSR_JOINTS+1 subplot(2,2,1) closest_pos = display_skeletons(closest_pos, skel, color=(23,), skel_type='Low') imshow(closest_pos); axis('off') subplot(2,2,2) imshow(closest_pos_e); axis('off') subplot(2,2,3) imshow(closest_pos_e==closest_pos); axis('off') # blank = np.ones(300,300) # removed_text = '' # for i in removed_joints: # removed_text += str(i)+" " # cv2.putText() tight_layout() subplots_adjust(left=.001, bottom=.001, wspace=.003, hspace=.003) show() # embed() # closest_pos_e[im_depth==0] = N_MSR_JOINTS+1 # closest_pos2[-mask] = N_MSR_JOINTS+1 # closest_pos2[closest_pos>=32000] = N_MSR_JOINTS+1 # closest_arg[im_depth==0] = N_MSR_JOINTS+1 # figure(1); imshow(closest_arg); # figure(3); imshow(closest_pos2) # # Reorder # im_order = np.zeros_like(im_depth)-1 # for p in range(len(pos_order)): # mask_tmp = closest_pos==p # dists_tmp = distance_ims[mask_tmp, p] + skel[pos_order[p],2] # im_order[mask_tmp] = np.maximum(im_order[mask_tmp], dists_tmp) # figure(100) # imshow(closest_pos) # figure(101) # imshow(im_depth) # show() # embed() return closest_pos
def compute_features(name, vis=False, person_rez=[144,72]): ''' ---Parameters--- filename : base filename for depth/color/skel vis: turn visualize on? person_rez : all hog/hof features should be the same dimensions, so we resize the people to this resolution ---Return--- features: features in a dictionary ''' ''' Get filenames ''' depth_file = name + "depth.bin" color_file = name + "rgb.avi" skeleton_file = name + "skeleton.txt" ''' Read data from each video/sequence ''' try: depthIms, maskIms = read_MSR_depth_ims(depth_file) colorIms = read_MSR_color_ims(color_file) skels_world, skels_im = read_MSR_skeletons(skeleton_file) except: print "Error reading data" return -1 #print depthIms.shape, colorIms.shape framecount = np.minimum(depthIms.shape[0], colorIms.shape[0]) dataset_features = {'framecount':framecount, 'hog':[], 'hof':[], 'skel_image':[], 'skel_world':[]} grayIm_prev = None ''' View all data''' for frame in xrange(framecount): depth = depthIms[frame] mask = maskIms[frame] color = colorIms[frame] # Skeleton in world (w) and image (i) coordinates skel_w = skels_world[frame] skel_i = world2depth(skel_w, rez=[240,320]) ''' Calculate hogs ''' grayIm = (rgb2gray(color) * 255).astype(np.uint8) hogIm = np.zeros_like(depth) person_mask, bounding_boxes, labels = extract_people(grayIm, mask>0) rez = grayIm[bounding_boxes[0]].shape #hog_input_im = sm.imresize(grayIm[bounding_boxes[0]], person_rez) hog_input_im = cv2.resize(grayIm[bounding_boxes[0]], (person_rez[1], person_rez[0])) hogData, hogImBox = hog(hog_input_im, orientations=4, visualise=True) #hogIm[bounding_boxes[0]] = sm.imresize(hogImBox, [rez[0],rez[1]]) hogIm[bounding_boxes[0]] = cv2.resize(hogImBox, (rez[1],rez[0])) # hogIm[bounding_boxes[0]] = hogImBox hogIm *= person_mask ''' Calculate HOF ''' hofIm = np.zeros_like(depth) if grayIm_prev is None: grayIm_prev = np.copy(grayIm) continue else: flow = getFlow(grayIm_prev[bounding_boxes[0]], grayIm[bounding_boxes[0]]) rez = flow.shape bounding_boxes = (bounding_boxes[0][0], bounding_boxes[0][1], slice(0,2)) #hof_input_im = np.dstack([sm.imresize(flow[0], [person_rez[0],person_rez[1]]), # sm.imresize(flow[1], [person_rez[0],person_rez[1]])])i hof_input_im = np.dstack([cv2.resize(flow[0], (person_rez[1],person_rez[0])), cv2.resize(flow[1], (person_rez[1], person_rez[0]))]) hofData, hofImBox = hof(hof_input_im, orientations=5, visualise=True) #hofIm[bounding_boxes[:2]] = sm.imresize(hofImBox, [rez[0],rez[1]]) hofIm[bounding_boxes[:2]] = cv2.resize(hogImBox, (rez[1],rez[0])) hofIm *= person_mask grayIm_prev = np.copy(grayIm) ''' Add features ''' dataset_features['hog'] += [hogData] dataset_features['hof'] += [hofData] dataset_features['skel_image'] += [skel_i] dataset_features['skel_world'] += [skel_w] ''' Plot skeletons on color image''' if vis: color = display_skeletons(color, skel_i) ''' Visualization ''' cv2.imshow("Depth", depth/float(depth.max())) cv2.imshow("HOG", hogIm/float(hogIm.max())) cv2.imshow("RGB", color) cv2.imshow("RGB masked", color*(mask[:,:,None]>0)) cv2.imshow("HOF", hofIm/float(hofIm.max())) ret = cv2.waitKey(10) if ret >= 0: break print "Done calculating ", name return dataset_features
def main(visualize=False, learn=False, actions=None, subjects=None, n_frames=220): # learn = True # learn = False if actions is []: actions = [2] if subjects is []: subjects = [2] # actions = [1] # actions = [1, 2, 3, 4, 5] # subjects = [1] if 1: MHAD = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) else: MHAD = False cam = KinectPlayer(base_dir='./', device=1, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) bg = Image.open( '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif') # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape( [240, 320]).clip(0, 4500) height, width = cam.depthIm.shape skel_previous = None face_detector = FaceDetector() hand_detector = HandDetector(cam.depthIm.shape) # curve_detector = CurveDetector(cam.depthIm.shape) # Video writer # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240)) # Save Background model # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I') # im.save("/Users/Colin/Desktop/k2.png") # Setup pose database append = True append = False # pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0,4,7,10,13], append=append) pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0, 2, 4, 5, 7, 10, 13], append=append) # Setup Tracking skel_init, joint_size, constraint_links, features_joints, skel_parts, convert_to_kinect = get_14_joint_properties( ) constraint_values = [] for c in constraint_links: constraint_values += [ np.linalg.norm(skel_init[c[0]] - skel_init[c[1]], 2) ] constraint_values = np.array(constraint_values) skel_current = None #skel_init.copy() skel_previous = None #skel_current.copy() # Evaluation accuracy_all_db = [] accuracy_all_track = [] joint_accuracy_db = [] joint_accuracy_track = [] # geo_accuracy = [] # color_accuracy = [] # lbp_accuracy = [] frame_count = 0 frame_rate = 1 if not MHAD: cam.next(350) frame_prev = 0 # try: if 1: while cam.next(frame_rate): # and frame_count < n_frames: if frame_count - frame_prev > 100: print "" print "Frame #{0:d}".format(frame_count) frame_prev = frame_count if not MHAD: if len(cam.users) == 0: continue else: # cam.users = [np.array(cam.users[0]['jointPositions'].values())] if np.any(cam.users[0][0] == -1): continue cam.users[0][:, 1] *= -1 cam.users_uv_msr = [ cam.camera_model.world2im(cam.users[0], [240, 320]) ] # Apply mask to image if MHAD: mask = cam.get_person(2) > 0 else: mask = cam.get_person() > 0 if np.all(mask == False): continue im_depth = cam.depthIm cam.depthIm[cam.depthIm > 3000] = 0 im_color = cam.colorIm * mask[:, :, None] cam.colorIm *= mask[:, :, None] pose_truth = cam.users[0] pose_truth_uv = cam.users_uv_msr[0] # Get bounding box around person box = nd.find_objects(mask)[0] d = 20 # Widen box box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) box_corner = [box[0].start, box[1].start] ''' ---------- ----------------------------------- --------''' ''' ----------- Feature Detector centric approach ---------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Detectors ---- ''' # Face detection face_detector.run(im_color[box]) # Skin detection hand_markers = hand_detector.run(im_color[box], n_peaks=3) # Calculate Geodesic Extrema im_pos = cam.camera_model.im2PosIm( cam.depthIm * mask)[box] * mask[box][:, :, None] geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=5, visualize=False) _, geo_map = geodesic_extrema_MPI(im_pos, iterations=1, visualize=True) geodesic_markers_pos = im_pos[geodesic_markers[:, 0], geodesic_markers[:, 1]] markers = list(geodesic_markers) + list( hand_markers) #+ list(lop_markers) + curve_markers markers = np.array([list(x) for x in markers]) ''' ---- Database lookup ---- ''' if 1: pts_mean = im_pos[(im_pos != 0)[:, :, 2]].mean(0) if learn: # Normalize pose pose_uv = cam.users_uv[0] if np.any(pose_uv == 0): print "skip" frame_count += frame_rate continue pose_database.update(pose_truth - pts_mean) else: # Concatenate markers markers = list(geodesic_markers) + hand_markers markers = np.array([list(x) for x in markers]) # Normalize pose pts = im_pos[markers[:, 0], markers[:, 1]] pts = np.array([x for x in pts if x[0] != 0]) pts -= pts_mean # Get closest pose pose = pose_database.query(pts, knn=5) # embed() for i in range(5): pose_tmp = cam.camera_model.world2im( pose[i] + pts_mean, cam.depthIm.shape) cam.colorIm = display_skeletons(cam.colorIm, pose_tmp, skel_type='Kinect', color=(0, i * 40 + 50, 0)) pose = pose[0] # im_pos -= pts_mean # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000) # pose = np.dot(R.T, pose.T).T - t # pose = np.dot(R, pose.T).T + t pose += pts_mean pose_uv = cam.camera_model.world2im( pose, cam.depthIm.shape) # print pose surface_map = nd.distance_transform_edt( -nd.binary_erosion(mask[box]), return_distances=False, return_indices=True) try: pose_uv[:, :2] = surface_map[:, pose_uv[:, 0] - box_corner[0], pose_uv[:, 1] - box_corner[1]].T + [ box_corner[0], box_corner[1] ] except: pass pose = cam.camera_model.im2world(pose_uv, cam.depthIm.shape) # print pose ''' ---- Tracker ---- ''' # surface_map = nd.distance_transform_edt(-mask[box], return_distances=False, return_indices=True) # surface_map = nd.distance_transform_edt(im_pos[:,:,2]==0, return_distances=False, return_indices=True) if skel_previous is None: # if 1: skel_previous = pose.copy() skel_current = pose.copy() skel_previous_uv = pose_uv.copy() skel_current_uv = pose_uv.copy() for _ in range(1): # ---- (Step 1A) Find feature coordespondences ---- try: skel_previous_uv[:, : 2] = surface_map[:, skel_previous_uv[:, 0] - box_corner[0], skel_previous_uv[:, 1] - box_corner[1]].T + [ box_corner[0], box_corner[1] ] except: pass skel_current = cam.camera_model.im2world( skel_previous_uv, cam.depthIm.shape) # Alternative method: use kdtree ## Calc euclidian distance between each pixel and all joints px_corr = np.zeros( [im_pos.shape[0], im_pos.shape[1], len(skel_current)]) # for i,s in enumerate(pose): # for i,s in enumerate(skel_current): # px_corr[:,:,i] = np.sqrt(np.sum((im_pos - s)**2, -1))# / joint_size[i]**2 # for i,s in enumerate(pose_uv): # Geodesics for i, s in enumerate(skel_previous_uv): ''' Problem: need to constrain pose_uv to mask ''' _, geo_map = geodesic_extrema_MPI( im_pos, [s[0] - box_corner[0], s[1] - box_corner[1]], iterations=1, visualize=True) px_corr[:, :, i] = geo_map subplot(2, 7, i + 1) # imshow(geo_map, vmin=0, vmax=2000) # axis('off') px_corr[geo_map == 0, i] = 9999 cv2.imshow('gMap', (px_corr.argmin(-1) + 1) / 15. * mask[box]) ## Handle occlusions by argmax'ing over set of skel parts # visible_configurations = list(it.product([0,1], repeat=5))[1:] visible_configurations = [ # [0,1,1,1,1], # [1,0,0,0,0], [1, 1, 1, 1, 1] ] px_visibility_label = np.zeros([ im_pos.shape[0], im_pos.shape[1], len(visible_configurations) ], dtype=np.uint8) visible_scores = np.ones(len(visible_configurations)) * np.inf # Try each occlusion configuration set for i, v in enumerate(visible_configurations): visible_joints = list( it.chain.from_iterable(skel_parts[np.array(v) > 0])) px_visibility_label[:, :, i] = np.argmin( px_corr[:, :, visible_joints], -1) #.reshape([im_pos.shape[0], im_pos.shape[1]]) visible_scores[i] = np.min(px_corr[:, :, visible_joints], -1).sum() # Choose best occlusion configuration occlusion_index = np.argmin(visible_scores) occlusion_configuration = visible_configurations[ occlusion_index] occlusion_set = list( it.chain.from_iterable(skel_parts[np.array( visible_configurations[occlusion_index]) > 0])) # Choose label for pixels based on occlusion configuration px_label = px_visibility_label[:, :, occlusion_index] * mask[box] px_label_flat = px_visibility_label[:, :, occlusion_index][ mask[box]].flatten() visible_joints = [ 1 if x in occlusion_set else 0 for x in range(len(pose)) ] # print visible_joints # Project distance to joint's radius px_joint_displacement = im_pos[ mask[box]] - skel_current[px_label_flat] px_joint_magnitude = np.sqrt( np.sum(px_joint_displacement**2, -1)) joint_mesh_pos = skel_current[ px_label_flat] + px_joint_displacement * ( joint_size[px_label_flat] / px_joint_magnitude)[:, None] px_joint_displacement = joint_mesh_pos - im_pos[mask[box]] # Ensure pts aren't too far away px_joint_displacement[np.abs(px_joint_displacement) > 500] = 0 # embed() if 0: x = im_pos.copy() * 0 x[mask[box]] = joint_mesh_pos for i in range(3): subplot(1, 4, i + 1) imshow(x[:, :, i]) axis('off') subplot(1, 4, 4) imshow((px_label + 1) * mask[box]) # Calc the correspondance change in position for each joint correspondence_displacement = np.zeros([len(skel_current), 3]) ii = 0 for i, _ in enumerate(skel_current): if i in occlusion_set: labels = px_label_flat == i correspondence_displacement[i] = np.sum( px_joint_displacement[px_label_flat == ii], 0 ) / np.sum( px_joint_displacement[px_label_flat == ii] != 0) ii += 1 correspondence_displacement = np.nan_to_num( correspondence_displacement) # print correspondence_displacement # Viz correspondences if 0: x = im_pos.copy() * 0 x[mask[box]] = px_joint_displacement for i in range(3): subplot(1, 4, i + 1) imshow(x[:, :, i]) axis('off') subplot(1, 4, 4) imshow((px_label + 1) * mask[box]) # embed() # for j in range(3): # for i in range(14): # subplot(3,14,j*14+i+1) # imshow(x[:,:,j]*((px_label==i)*mask[box])) # axis('off') show() # ---- (Step 2) Update pose state, x ---- lambda_p = .0 lambda_c = 1. skel_prev_difference = (skel_current - skel_previous) # print skel_prev_difference skel_current = skel_previous \ + lambda_p * skel_prev_difference \ - lambda_c * correspondence_displacement#\ # ---- (Step 3) Add constraints ---- if 1: # A: Link lengths / geometry # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) skel_current = collision_constraints( skel_current, constraint_links) skel_img_box = (cam.camera_model.world2im( skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0] ) #/mask_interval skel_img_box = skel_img_box.clip([0, 0, 0], [ box[0].stop - box[0].start - 1, box[1].stop - box[1].start - 1, 9999 ]) # skel_img_box = skel_img_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # B: Ray-cast constraints # embed() skel_current, skel_current_uv = ray_cast_constraints( skel_current, skel_img_box, im_pos, surface_map, joint_size) # skel_img_box -= [box[0].start, box[1].start, 0] # # Map back from mask to image # try: # skel_current_uv[:,:2] = surface_map[:, skel_img_box[:,0], skel_img_box[:,1]].T# + [box_corner[0], box_corner[1]] # except: # pass prob = link_length_probability(skel_current, constraint_links, constraint_values, 100) # print "Prob:", np.mean(prob), np.min(prob), prob print frame_count thresh = .05 if np.min(prob) < thresh: # and frame_count > 1: print 'Resetting pose' for c in constraint_links[prob < thresh]: for cc in c: skel_current_uv[c] = pose_uv[c] - [ box[0].start, box[1].start, 0 ] skel_current[c] = pose[c] # skel_current_uv = pose_uv.copy() - [box[0].start, box[1].start, 0] # skel_current = pose.copy() skel_current_uv = skel_current_uv + [ box[0].start, box[1].start, 0 ] skel_current = cam.camera_model.im2world( skel_current_uv, cam.depthIm.shape) else: skel_current_uv = (cam.camera_model.world2im( skel_current, cam.depthIm.shape)) # skel_img_box = skel_img_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # Update for next round skel_previous = skel_current.copy() skel_previous_uv = skel_current_uv.copy() ''' ---- Accuracy ---- ''' # embed() if 1 and not learn: # pose_truth = cam.users[0] error_db = pose_truth - pose error_track = pose_truth - skel_current # print "Error", error error_l2_db = np.sqrt(np.sum(error_db**2, 1)) error_l2_track = np.sqrt(np.sum(error_track**2, 1)) joint_accuracy_db += [error_l2_db] joint_accuracy_track += [error_l2_track] accuracy_db = np.sum(error_l2_db < 150) / 14. accuracy_track = np.sum(error_l2_track < 150) / 14. print "Current db:", accuracy_db, error_l2_db.mean() print "Current track:", accuracy_track, error_l2_track.mean() print "" accuracy_all_db += [accuracy_db] accuracy_all_track += [accuracy_track] # print "Running avg:", np.mean(accuracy_all) # print "Joint avg (per-joint):", np.mean(joint_accuracy_all, -1) # print "Joint avg (overall):", np.mean(joint_accuracy_all) ''' --- Visualization --- ''' display_markers(cam.colorIm, hand_markers[:2], box, color=(0, 250, 0)) if len(hand_markers) > 2: display_markers(cam.colorIm, [hand_markers[2]], box, color=(0, 200, 0)) display_markers(cam.colorIm, geodesic_markers, box, color=(200, 0, 0)) # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100)) # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200)) cam.colorIm = display_skeletons(cam.colorIm, pose_truth_uv, skel_type='Kinect', color=(0, 255, 0)) cam.colorIm = display_skeletons(cam.colorIm, pose_uv, skel_type='Kinect') cam.colorIm = display_skeletons(cam.colorIm, skel_current_uv, skel_type='Kinect', color=(0, 0, 255)) # cam.visualize(color=True, depth=False) cam.visualize(color=True, depth=True) # embed() # ------------------------------------------------------------ # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8)) # video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count += frame_rate # except: # pass print "-- Results for subject {:d} action {:d}".format( subjects[0], actions[0]) print "Running avg (db):", np.mean(accuracy_all_db) print "Running avg (track):", np.mean(accuracy_all_track) print "Joint avg (overall db):", np.mean(joint_accuracy_db) print "Joint avg (overall track):", np.mean(joint_accuracy_track) # print 'Done' embed() return
def compute_features(name, vis=False, person_rez=[144, 72]): ''' ---Parameters--- filename : base filename for depth/color/skel vis: turn visualize on? person_rez : all hog/hof features should be the same dimensions, so we resize the people to this resolution ---Return--- features: features in a dictionary ''' ''' Get filenames ''' depth_file = name + "depth.bin" color_file = name + "rgb.avi" skeleton_file = name + "skeleton.txt" ''' Read data from each video/sequence ''' try: depthIms, maskIms = read_MSR_depth_ims(depth_file) colorIms = read_MSR_color_ims(color_file) skels_world, skels_im = read_MSR_skeletons(skeleton_file) except: print "Error reading data" return -1 #print depthIms.shape, colorIms.shape framecount = np.minimum(depthIms.shape[0], colorIms.shape[0]) dataset_features = { 'framecount': framecount, 'hog': [], 'hof': [], 'skel_image': [], 'skel_world': [] } grayIm_prev = None ''' View all data''' for frame in xrange(framecount): depth = depthIms[frame] mask = maskIms[frame] color = colorIms[frame] # Skeleton in world (w) and image (i) coordinates skel_w = skels_world[frame] skel_i = world2depth(skel_w, rez=[240, 320]) ''' Calculate hogs ''' grayIm = (rgb2gray(color) * 255).astype(np.uint8) hogIm = np.zeros_like(depth) person_mask, bounding_boxes, labels = extract_people(grayIm, mask > 0) rez = grayIm[bounding_boxes[0]].shape #hog_input_im = sm.imresize(grayIm[bounding_boxes[0]], person_rez) hog_input_im = cv2.resize(grayIm[bounding_boxes[0]], (person_rez[1], person_rez[0])) hogData, hogImBox = hog(hog_input_im, orientations=4, visualise=True) #hogIm[bounding_boxes[0]] = sm.imresize(hogImBox, [rez[0],rez[1]]) hogIm[bounding_boxes[0]] = cv2.resize(hogImBox, (rez[1], rez[0])) # hogIm[bounding_boxes[0]] = hogImBox hogIm *= person_mask ''' Calculate HOF ''' hofIm = np.zeros_like(depth) if grayIm_prev is None: grayIm_prev = np.copy(grayIm) continue else: flow = getFlow(grayIm_prev[bounding_boxes[0]], grayIm[bounding_boxes[0]]) rez = flow.shape bounding_boxes = (bounding_boxes[0][0], bounding_boxes[0][1], slice(0, 2)) #hof_input_im = np.dstack([sm.imresize(flow[0], [person_rez[0],person_rez[1]]), # sm.imresize(flow[1], [person_rez[0],person_rez[1]])])i hof_input_im = np.dstack([ cv2.resize(flow[0], (person_rez[1], person_rez[0])), cv2.resize(flow[1], (person_rez[1], person_rez[0])) ]) hofData, hofImBox = hof(hof_input_im, orientations=5, visualise=True) #hofIm[bounding_boxes[:2]] = sm.imresize(hofImBox, [rez[0],rez[1]]) hofIm[bounding_boxes[:2]] = cv2.resize(hogImBox, (rez[1], rez[0])) hofIm *= person_mask grayIm_prev = np.copy(grayIm) ''' Add features ''' dataset_features['hog'] += [hogData] dataset_features['hof'] += [hofData] dataset_features['skel_image'] += [skel_i] dataset_features['skel_world'] += [skel_w] ''' Plot skeletons on color image''' if vis: color = display_skeletons(color, skel_i) ''' Visualization ''' cv2.imshow("Depth", depth / float(depth.max())) cv2.imshow("HOG", hogIm / float(hogIm.max())) cv2.imshow("RGB", color) cv2.imshow("RGB masked", color * (mask[:, :, None] > 0)) cv2.imshow("HOF", hofIm / float(hofIm.max())) ret = cv2.waitKey(10) if ret >= 0: break print "Done calculating ", name return dataset_features
def main(visualize=False, learn=False): # Init both cameras # fill = True fill = False get_color = True cam = KinectPlayer(base_dir='./', device=1, bg_subtraction=True, get_depth=True, get_color=get_color, get_skeleton=True, fill_images=fill) cam2 = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=get_color, get_skeleton=True, fill_images=fill) # Transformation matrix from first to second camera data = pickle.load(open("Registration.dat", 'r')) transform_c1_to_c2 = data['transform'] # Get random forest parameters if learn: rf = RFPose(offset_max=100, feature_count=300) else: rf = RFPose() rf.load_forest() ii = 0 # cam.next(200) all_joint_ims_z = [] all_joint_ims_c = [] while cam.next() and ii < 200: # Update frames cam2.sync_cameras(cam) if ii%10 != 0: ii += 1 continue # Transform skels from cam1 to cam2 cam_skels = [np.array(cam.users[s]['jointPositions'].values()) for s in cam.users.keys()] # Get rid of bad skels cam_skels = [s for s in cam_skels if np.all(s[0] != -1)] if len(cam_skels) == 0: continue ii+=1 # Save images if 1: joint_ims_z = [] joint_ims_c = [] dx = 10 skel_tmp = skel2depth(cam_skels[0], [240,320]) for j_pos in skel_tmp: # embed() joint_ims_z += [cam.depthIm[j_pos[0]-dx:j_pos[0]+dx, j_pos[1]-dx:j_pos[1]+dx]] joint_ims_c += [cam.colorIm[j_pos[0]-dx:j_pos[0]+dx, j_pos[1]-dx:j_pos[1]+dx]] if len(joint_ims_z) > 0: all_joint_ims_z += [joint_ims_z] all_joint_ims_c += [joint_ims_c] if 0: cam2_skels = transform_skels(cam_skels, transform_c1_to_c2, 'image') try: depth = cam2.get_person() if learn: rf.add_frame(depth, cam2_skels[0]) else: rf.infer_pose(depth) if visualize: cam2.depthIm = display_skeletons(cam2.depthIm, cam2_skels[0], (5000,), skel_type='Low') skel1 = kinect_to_msr_skel(skel2depth(cam_skels[0], [240,320])) cam.depthIm = display_skeletons(cam.depthIm, skel1, (5000,), skel_type='Low') cam.visualize() cam2.visualize() except: pass embed() if learn: print "Starting forest" rf.learn_forest() print 'Done'
def main_infer(rf_name=None): ''' ''' if rf_name is None: import os files = os.listdir('Saved_Params/') rf_name = 'Saved_Params/' + files[-1] print "Classifier file:",rf_name # Load classifier data data = pickle.load(open(rf_name)) rf = data['rf'] offsets_1, offsets_2 = data['offsets'] ''' Read data from each video/sequence ''' depthIms = [] skels_world = [] if 1: # VP = KinectPlayer(base_dir='/Users/colin/Data/Office_25Feb2013/', device=2, get_depth=True, get_color=False, get_skeleton=True, get_mask=False) VP = KinectPlayer(base_dir='/Users/colin/Data/Room_close_26Feb13/', device=1, get_depth=True, get_color=False, get_skeleton=True, get_mask=False) _,_ = VP.get_n_skeletons(50) depthIms, skels_world = VP.get_n_skeletons(100) # depthIms = np.array(depthIms)[:,:,::-1] else: # name = 'a01_s01_e02_' name = 'a01_s10_e02_' # name = 'a02_s06_e02_' # name = 'a05_s02_e02_' depth_file = name + "depth.bin" color_file = name + "rgb.avi" skeleton_file = name + "skeleton.txt" try: depthIms, maskIms = read_MSR_depth_ims(depth_file) depthIms *= maskIms depthIms /= 10 # colorIms = read_MSR_color_ims(color_file) skels_world, skels_im = read_MSR_skeletons(skeleton_file) skels_world[:,2]/=10 except: print "Error reading data" ''' Process data ''' all_pred_ims = [] for i in xrange(1, len(depthIms), 1): # try: if 1: print i ''' Get frame data ''' im_depth = depthIms[i] # im_depth[160:, :] = 0 skel_pos = skel2depth(skels_world[i], rez=[240,320]) # ''' --- NOTE THIS 10 PX OFFSET IN THE MSR DATASET !!! --- ''' # skel_pos[:,0] -= 10 skel_pos_pred, im_predict = infer_pose(im_depth, rf, offsets_1, offsets_2) # Overlay skeletons if 1: # colorIm = colorIms[i] # im_predict = colorIm cv2.imshow("forest_prediction", im_predict/float(im_predict.max())) im_predict = np.repeat(im_depth[:,:,None].astype(np.float), 3, -1) # embed() im_predict[im_depth>0] -= im_depth[im_depth>0].min() im_predict /= float(im_predict.max()/255.) im_predict = im_predict.astype(np.uint8) # im_predict = np.repeat(im_predict[:,:,None], 3, -1) # im_predict /= float(im_predict.max())*255 # im_predict = im_predict.astype(np.uint8) im_predict = display_skeletons(im_predict, skel_pos, (255,0,0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos_pred, (0,255,0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos, (255,0,0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos_pred, (0,255,0), SKEL_DISPLAY_MODE) # embed() # max_ = (im_predict * (im_predict < 255)).max() all_pred_ims += [im_predict] ''' Visualize ''' if 1: cv2.putText(im_predict, "Blue=Truth", (10, 210), cv2.FONT_HERSHEY_DUPLEX, .5, (int(im_predict.max()/2), 0, 0)) cv2.putText(im_predict, "Green=Predict", (10, 230), cv2.FONT_HERSHEY_DUPLEX, .5, (0, int(im_predict.max()/2), 0)) cv2.imshow("prediction", im_predict) ret = cv2.waitKey(10) if ret > 0: break time.sleep(.5) # except: # print "Frame failed:", i # break embed()
def main(visualize=False, learn=False, patch_size=32, n_frames=2500): if 1: get_color = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinects=[1], actions=[1], subjects=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # cam.bgSubtraction.backgroundModel = sm.imread('/Users/colin/Data/CIRL_28Feb2013/depth/59/13/47/device_1/depth_59_13_47_4_13_35507.png').clip(0, 4500) # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/Office_Background_B.tif') # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/Wall_Background_B.tif') # cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape([240,320]).clip(0, 4500) # embed() # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) elif 0: get_color = False cam = EVALPlayer(base_dir='/Users/colin/Data/EVAL/', bg_subtraction=True, get_depth=True, get_skeleton=True, fill_images=False) elif 0: get_color = False cam = MSRPlayer(base_dir='/Users/colin/Data/MSR_DailyActivities/Data/', actions=[1], subjects=[1, 2, 3, 4, 5], bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) embed() height, width = cam.depthIm.shape skel_names = np.array(['head', 'neck', 'torso', 'l_shoulder', 'l_elbow', 'l_hand', \ 'r_shoulder', 'r_elbow', 'r_hand', 'l_hip', 'l_knee', 'l_foot',\ 'r_hip', 'r_knee', 'r_foot']) # skel_init, joint_size, constraint_links, features_joints,convert_to_kinect = get_11_joint_properties() skel_init, joint_size, constraint_links, features_joints, skel_parts, convert_to_kinect = get_13_joint_properties( ) # skel_init, joint_size, constraint_links, features_joints,skel_parts, convert_to_kinect = get_14_joint_properties() # skel_init, joint_size, constraint_links, features_joints,convert_to_kinect = get_15_joint_properties() constraint_values = [] for c in constraint_links: constraint_values += [ np.linalg.norm(skel_init[c[0]] - skel_init[c[1]], 2) ] constraint_values = np.array(constraint_values) skel_current = skel_init.copy() skel_previous = skel_current.copy() face_detector = FaceDetector() hand_template = sm.imread('/Users/colin/Desktop/fist.png')[:, :, 2] hand_template = (255 - hand_template) / 255. if height == 240: hand_template = cv2.resize(hand_template, (10, 10)) else: hand_template = cv2.resize(hand_template, (20, 20)) frame_count = 0 if get_color and height == 240: cam.next(220) accuracy_measurements = {'overall': [], 'per_joint': []} # Video writer # print '1' video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M', 'J', 'P', 'G'), 15, (320, 240)) # print '1' # embed() while cam.next(1) and frame_count < n_frames: print "" print "Frame #{0:d}".format(frame_count) # Get rid of bad skeletons if type(cam.users) == dict: cam_skels = [ np.array(cam.users[s]['jointPositions'].values()) for s in cam.users ] else: cam_skels = [np.array(s) for s in cam.users] cam_skels = [s for s in cam_skels if np.all(s[0] != -1)] # Check for skeletons # if len(cam_skels) == 0: # continue # Apply mask to image mask = cam.get_person() > 0 # if mask is False: if 1: if len(cam_skels) > 0: # cam.colorIm = display_skeletons(cam.colorIm[:,:,2], skel_msr_im, (255,), skel_type='Kinect')[:,:,None] cam.colorIm[:, :, 1] = display_skeletons( cam.colorIm[:, :, 2], skel2depth(cam_skels[0], cam.depthIm.shape), (255, ), skel_type='Kinect') ## Max P=31 for LBPs becuase of datatype # tmp = local_binary_pattern(-cam.depthIm, 1, 10)#*(cam.foregroundMask>0) # embed() # tmp = local_occupancy_pattern(cam.depthIm, 31, 20, px_diff_thresh=100)*(cam.foregroundMask>0) # cv2.imshow("LBP", np.abs(tmp.astype(np.float))/float(tmp.max())) cam.colorIm = cam.colorIm[:, :, [0, 2, 1]] cam.visualize() continue # Anonomize # c_masked = cam.colorIm*mask[:,:,None] # d_masked = cam.depthIm*mask # c_masked_neg = cam.colorIm*(-mask[:,:,None]) im_depth = cam.depthIm if get_color: im_color = cam.colorIm im_color *= mask[:, :, None] im_color = np.ascontiguousarray(im_color) im_color = im_color[:, :, [2, 1, 0]] if len(cam_skels) > 0: skel_msr_xyz = cam_skels[0] skel_msr_im = skel2depth(cam_skels[0], cam.depthIm.shape) box = nd.find_objects(mask)[0] d = 20 box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) # Face and skin detection if get_color: face_detector.run(im_color[box]) im_skin = rgb2lab(cam.colorIm[box].astype(np.int16))[:, :, 1] # im_skin = skimage.exposure.equalize_hist(im_skin) # im_skin = skimage.exposure.rescale_intensity(im_skin, out_range=[0,1]) im_skin *= im_skin > face_detector.min_threshold im_skin *= im_skin < face_detector.max_threshold # im_skin *= face_detector>.068 skin_match_c = nd.correlate(im_skin, hand_template) # Display Predictions - Color Based matching optima = peak_local_max(skin_match_c, min_distance=20, num_peaks=3, exclude_border=False) # Visualize if len(optima) > 0: optima_values = skin_match_c[optima[:, 0], optima[:, 1]] optima_thresh = np.max(optima_values) / 2 optima = optima.tolist() for i, o in enumerate(optima): if optima_values[i] < optima_thresh: optima.pop(i) break joint = np.array(o) + [box[0].start, box[1].start] circ = np.array(circle(joint[0], joint[1], 5)).T circ = circ.clip([0, 0], [height - 1, width - 1]) cam.colorIm[circ[:, 0], circ[:, 1]] = ( 0, 120 - 30 * i, 0 ) #(255*(i==0),255*(i==1),255*(i==2)) markers = optima # ---------------- Tracking Algorithm ---------------- # ---- Preprocessing ---- if get_color: im_pos = rgbIm2PosIm(cam.depthIm * mask)[box] * mask[box][:, :, None] else: im_pos = cam.posIm[box] cam.depthIm[cam.depthIm == 4500] = 0 im_pos_mean = np.array([ im_pos[:, :, 0][im_pos[:, :, 2] != 0].mean(), im_pos[:, :, 1][im_pos[:, :, 2] != 0].mean(), im_pos[:, :, 2][im_pos[:, :, 2] != 0].mean() ], dtype=np.int16) # Zero-center if skel_current[0, 2] == 0: skel_current += im_pos_mean skel_previous += im_pos_mean # Calculate Geodesic Extrema extrema = geodesic_extrema_MPI(im_pos, iterations=15, visualize=False) if len(extrema) > 0: for i, o in enumerate(extrema): joint = np.array(o) + [box[0].start, box[1].start] circ = np.array(circle(joint[0], joint[1], 5)).T circ = circ.clip([0, 0], [height - 1, width - 1]) cam.colorIm[circ[:, 0], circ[:, 1]] = (0, 0, 200 - 10 * i) # Calculate Z-surface surface_map = nd.distance_transform_edt(-mask[box], return_distances=False, return_indices=True) # Only sample some of the points if 1: mask_interval = 1 feature_radius = 10 else: mask_interval = 3 feature_radius = 2 # Modify the box wrt the sampling box = (slice(box[0].start, box[0].stop, mask_interval), slice(box[1].start, box[1].stop, mask_interval)) im_pos_full = im_pos.copy() im_pos = im_pos[::mask_interval, ::mask_interval] box_height, box_width, _ = im_pos.shape skel_img_box = world2rgb( skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0] skel_img_box = skel_img_box.clip( [0, 0, 0], [im_pos.shape[0] - 1, im_pos.shape[1] - 1, 9999]) feature_width = feature_radius * 2 + 1 all_features = [face_detector.face_position, optima, extrema] total_feature_count = np.sum([len(f) for f in all_features]) # Loop through the rest of the constraints for _ in range(1): # ---- (Step 1A) Find feature coordespondences ---- color_feature_displacement = feature_joint_displacements( skel_current, im_pos, all_features[1], features_joints[1], distance_thresh=500) depth_feature_displacement = feature_joint_displacements( skel_current, im_pos, all_features[2], features_joints[2], distance_thresh=500) # Alternative method: use kdtree ## Calc euclidian distance between each pixel and all joints px_corr = np.zeros( [im_pos.shape[0], im_pos.shape[1], len(skel_current)]) for i, s in enumerate(skel_current): px_corr[:, :, i] = np.sqrt(np.sum((im_pos - s)**2, -1)) # / joint_size[i]**2 ## Handle occlusions by argmax'ing over set of skel parts # visible_configurations = list(it.product([0,1], repeat=5))[1:] visible_configurations = [ #[0,1,1,1,1], #[1,0,0,0,0], [1, 1, 1, 1, 1] ] px_visibility_label = np.zeros([ im_pos.shape[0], im_pos.shape[1], len(visible_configurations) ], dtype=np.uint8) visible_scores = np.ones(len(visible_configurations)) * np.inf # Try each occlusion configuration set for i, v in enumerate(visible_configurations): visible_joints = list( it.chain.from_iterable(skel_parts[np.array(v) > 0])) px_visibility_label[:, :, i] = np.argmin( px_corr[:, :, visible_joints], -1) #.reshape([im_pos.shape[0], im_pos.shape[1]]) visible_scores[i] = np.min(px_corr[:, :, visible_joints], -1).sum() # Choose best occlusion configuration occlusion_index = np.argmin(visible_scores) occlusion_configuration = visible_configurations[occlusion_index] occlusion_set = list( it.chain.from_iterable(skel_parts[ np.array(visible_configurations[occlusion_index]) > 0])) # Choose label for pixels based on occlusion configuration px_label = px_visibility_label[:, :, occlusion_index] * mask[box] px_label_flat = px_visibility_label[:, :, occlusion_index][ mask[box]].flatten() visible_joints = [ 1 if x in occlusion_set else 0 for x in range(len(skel_current)) ] # Project distance to joint's radius px_joint_displacement = skel_current[px_label_flat] - im_pos[ mask[box]] px_joint_magnitude = np.sqrt(np.sum(px_joint_displacement**2, -1)) joint_mesh_pos = skel_current[ px_label_flat] + px_joint_displacement * ( joint_size[px_label_flat] / px_joint_magnitude)[:, None] px_joint_displacement = joint_mesh_pos - im_pos[mask[box]] # Calc the correspondance change in position for each joint correspondence_displacement = np.zeros([len(skel_current), 3]) ii = 0 for i, _ in enumerate(skel_current): if i in occlusion_set: labels = px_label_flat == i correspondence_displacement[i] = np.mean( px_joint_displacement[px_label_flat == ii], 0) # correspondence_displacement[ii] = np.sum(px_joint_displacement[px_label_flat==ii], 0) ii += 1 correspondence_displacement = np.nan_to_num( correspondence_displacement) # ---- (Step 2) Update pose state, x ---- lambda_p = .0 lambda_c = .3 lambda_cf = .3 lambda_df = .0 skel_prev_difference = (skel_current - skel_previous) # embed() skel_current = skel_previous \ + lambda_p * skel_prev_difference \ - lambda_c * correspondence_displacement\ - lambda_cf * color_feature_displacement\ - lambda_df * depth_feature_displacement # ---- (Step 3) Add constraints ---- # A: Link lengths / geometry skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) # skel_current = collision_constraints(skel_current, constraint_links) skel_img_box = (world2rgb(skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0]) / mask_interval skel_img_box = skel_img_box.clip( [0, 0, 0], [cam.depthIm.shape[0] - 1, cam.depthIm.shape[1] - 1, 9999]) # B: Ray-cast constraints skel_current, skel_img_box = ray_cast_constraints( skel_current, skel_img_box, im_pos_full, surface_map, joint_size) # # Map back from mask to image skel_img = skel_img_box + [box[0].start, box[1].start, 0] # Update for next round skel_previous = skel_current.copy() # skel_previous = skel_init.copy() # ---------------------Accuracy -------------------------------------- # Compute accuracy wrt standard Kinect data # skel_im_error = skel_msr_im[:,[1,0]] - skel_img[[0,2,3,4,5,6,7,8,9,10,11,12,13,14],:2] skel_current_kinect = convert_to_kinect(skel_current) try: skel_msr_im_box = np.array([ skel_msr_im[:, 1] - box[0].start, skel_msr_im[:, 0] - box[1].start ]).T.clip([0, 0], [box_height - 1, box_width - 1]) skel_xyz_error = im_pos[ skel_msr_im_box[:, 0], skel_msr_im_box[:, 1]] - skel_current_kinect #skel_current[[0,2,3,4,5,6,7,8,9,10,11,12],:] skel_l2 = np.sqrt(np.sum(skel_xyz_error**2, 1)) # print skel_l2 skel_correct = np.nonzero(skel_l2 < 150)[0] accuracy_measurements['per_joint'] += [skel_l2] accuracy_measurements['overall'] += [ len(skel_correct) / float(len(skel_current_kinect)) * 100 ] print "{0:0.2f}% joints correct".format( len(skel_correct) / float(len(skel_current_kinect)) * 100) print "Overall accuracy: ", np.mean( accuracy_measurements['overall']) except: pass print "Visible:", visible_joints # ----------------------Visualization------------------------------------- # l = line(skel_img_box[joint][0], skel_img_box[joint][1], features[feat][0], features[feat][1]) # skimage.draw.set_color(cam.colorIm[box], l, (255,255,255)) # Add circles to image cam.colorIm = np.ascontiguousarray(cam.colorIm) if 0: #get_color: cam.colorIm = display_skeletons( cam.colorIm, skel_img[:, [1, 0, 2]] * np.array(visible_joints)[:, None], ( 0, 255, ), skel_type='Other', skel_contraints=constraint_links) for i, s in enumerate(skel_img): # if i not in skel_correct: if i not in occlusion_set: c = circle(s[0], s[1], 5) cam.colorIm[c[0], c[1]] = (255, 0, 0) # cam.colorIm = display_skeletons(cam.colorIm, world2rgb(skel_init+im_pos_mean, [240,320])[:,[1,0]], skel_type='Other', skel_contraints=constraint_links) if 1: if len(face_detector.face_position) > 0: for (x, y) in face_detector.face_position: pt1 = (int(y) + box[1].start - 15, int(x) + box[0].start - 15) pt2 = (pt1[0] + int(15), pt1[1] + int(15)) cv2.rectangle(cam.colorIm, pt1, pt2, (255, 0, 0), 3, 8, 0) if len(cam_skels) > 0: # cam.colorIm = display_skeletons(cam.colorIm[:,:,2], skel_msr_im, (255,), skel_type='Kinect')[:,:,None] cam.colorIm[:, :, 1] = display_skeletons(cam.colorIm[:, :, 2], skel_msr_im, (255, ), skel_type='Kinect') cam.visualize() cam.depthIm = local_binary_pattern( cam.depthIm * cam.foregroundMask, 50, 10) cv2.imshow("Depth", cam.depthIm / float(cam.depthIm.max())) # cam2.visualize() # embed() # 3D Visualization if 0: from mayavi import mlab # figure = mlab.figure(1, bgcolor=(0,0,0), fgcolor=(1,1,1)) figure = mlab.figure(1, bgcolor=(1, 1, 1)) figure.scene.disable_render = True mlab.clf() mlab.view(azimuth=-45, elevation=45, roll=0, figure=figure) mlab.points3d(-skel_current[:, 1], -skel_current[:, 0], skel_current[:, 2], scale_factor=100., color=(.5, .5, .5)) for c in constraint_links: x = np.array([skel_current[c[0]][0], skel_current[c[1]][0]]) y = np.array([skel_current[c[0]][1], skel_current[c[1]][1]]) z = np.array([skel_current[c[0]][2], skel_current[c[1]][2]]) mlab.plot3d(-y, -x, z, tube_radius=25., color=(1, 0, 0)) figure.scene.disable_render = False # 3-panel view if 0: subplot(2, 2, 1) scatter(skel_current[:, 1], skel_current[:, 2]) for i, c in enumerate(constraint_links): plot([skel_current[c[0], 1], skel_current[c[1], 1]], [skel_current[c[0], 2], skel_current[c[1], 2]]) axis('equal') subplot(2, 2, 3) scatter(skel_current[:, 1], -skel_current[:, 0]) for i, c in enumerate(constraint_links): plot([skel_current[c[0], 1], skel_current[c[1], 1]], [-skel_current[c[0], 0], -skel_current[c[1], 0]]) axis('equal') subplot(2, 2, 4) scatter(skel_current[:, 2], -skel_current[:, 0]) for i, c in enumerate(constraint_links): plot([skel_current[c[0], 2], skel_current[c[1], 2]], [-skel_current[c[0], 0], -skel_current[c[1], 0]]) axis('equal') # show() # ------------------------------------------------------------ video_writer.write(cam.colorIm[:, :, [2, 1, 0]]) frame_count += 1 print 'Done'
def main(device_id, record, base_dir, frame_difference_percent, get_skeleton, anonomize, viz, motion_lag_time=10, min_fps=0.5): ''' ---parameters--- device_id record base_dir frame_difference_percent get_skeleton anonomize viz motion_lag_time min_fps ''' '''------------ Setup Kinect ------------''' ''' Physical Kinect ''' depthDevice = RealTimeDevice(device=device_id, get_depth=True, get_color=True, get_skeleton=get_skeleton) depthDevice.start() maxFramerate = 100 minFramerate = min_fps recentMotionTime = time.time() imgStoreCount = 100 ''' ------------- Main -------------- ''' ''' Setup time-based stuff ''' prevTime = 0 prevFrame = 0 prevFrameTime = 0 currentFrame = 0 ms = time.time() prevFPSTime = time.time() diff = 0 prevSec = 0; secondCount = 0 prevSecondCountMax = 0 ''' Ensure base folder is there ''' createDirectory(base_dir) skel_dir = None skel_filename = None maskDir = None maskName = None depthOld = [] backgroundModel = None while 1: # try: if 1: depthDevice.update() colorRaw = depthDevice.colorIm depthRaw = depthDevice.depthIm users = depthDevice.users skel = None if len(depthOld) == imgStoreCount: depthOld.pop(0) ''' If framerate is too fast then skip ''' ''' Keep this after update to ensure fast enough kinect refresh ''' if (time.time() - float(ms))*1000 < 1000.0/maxFramerate: continue ''' Get new time info ''' time_ = time.localtime() day = str(time_.tm_yday) hour = str(time_.tm_hour) minute = str(time_.tm_min) second = str(time_.tm_sec) ms = str(time.time()) ms_str = ms ''' Look at how much of the image has changed ''' if depthOld != []: mask = (depthRaw != 0) * (depthOld[0] != 0) diff = (((depthRaw - depthOld[0]).astype(np.int16) > 200)*mask).sum() / (float(mask.sum()) * 100.) ''' We want to watch all video for at least 5 seconds after we seen motion ''' ''' This prevents problems where there is small motion that doesn't trigger the motion detector ''' if diff > frame_difference_percent: recentMotionTime = time.time() depthOld.append(depthRaw) if anonomize: '''Background model''' if backgroundModel is None: bgSubtraction = AdaptiveMixtureOfGaussians(depthRaw, maxGaussians=3, learningRate=0.2, decayRate=0.9, variance=100**2) backgroundModel = bgSubtraction.getModel() continue else: bgSubtraction.update(depthRaw) backgroundModel = bgSubtraction.getModel() cv2.imshow("BG Model", backgroundModel/backgroundModel.max()) foregroundMask = bgSubtraction.get_foreground(thresh=100) ''' Find people ''' foregroundMask, _, _ = extract_people(depthRaw, foregroundMask, minPersonPixThresh=5000, gradientFilter=True, gradThresh=15) else: foregroundMask = None ''' Write to file if there has been substantial change. ''' if diff > frame_difference_percent or time.time()-prevFrameTime > 1/minFramerate or time.time()-recentMotionTime < motion_lag_time: currentFrame += 1 if depthRaw != []: ''' Logical time ''' if second != prevSec: prevSecondCountMax = secondCount secondCount = 0 prevSec = second else: secondCount = int(secondCount) + 1 secondCount = str(secondCount) if len(ms_str) == 1: ms_str = '0' + ms_str if len(secondCount) == 1: secondCount = '0' + secondCount ''' Keep track of framerate ''' if prevTime != second: prevTime = second print "FPS: {0:.1f} Diff: {1:.1f}%".format((currentFrame-prevFrame)/(time.time()-prevFPSTime), diff) prevFrame = currentFrame prevFPSTime = time.time() ''' Create folder/file names ''' if record: if 1: ''' Version 1. 2012 ''' ms_str = str(ms)[str(ms).find(".")+1:] depth_dir = base_dir+'depth/'+day+"/"+hour+"/"+minute+"/device_"+str(device_id) depth_filename = depth_dir + "/depth_"+day+"_"+hour+"_"+minute+"_"+second+"_"+secondCount+"_"+ms_str+".png" color_dir = base_dir+'color/'+day+"/"+hour+"/"+minute+"/device_"+str(device_id) color_filename = color_dir + "/color_"+day+"_"+hour+"_"+minute+"_"+second+"_"+secondCount+"_"+ms_str+".jpg" if get_skeleton: skel_dir = base_dir+'skel/'+day+"/"+hour+"/"+minute+"/device_"+str(device_id) skel_filename = skel_dir + "/skel_"+day+"_"+hour+"_"+minute+"_"+second+"_"+secondCount+"_"+ms_str+"_.dat" if anonomize: maskDir = base_dir+'mask/'+day+"/"+hour+"/"+minute+"/device_"+str(device_id) maskName = maskDir + "/mask_"+day+"_"+hour+"_"+minute+"_"+second+"_"+secondCount+"_"+ms_str+".jpg" else: ''' Version 2. April 2013 ''' base_sub_dir = "{:s}device_{:d}/{:s}/{:s}/{:s}".format(base_dir,device_id,day,hour,minute) depth_dir = "{:s}/depth".format(base_sub_dir) color_dir = "{:s}/color".format(base_sub_dir) depth_filename = "{:s}/depth_{:s}_{:s}_{:s}_{:s}_{:s}_{:s}_.png".format(depth_dir,day,hour,minute,second,secondCount,ms_str) color_filename = "{:s}/color_{:s}_{:s}_{:s}_{:s}_{:s}_{:s}_.jpg".format(color_dir,day,hour,minute,second,secondCount,ms_str) if get_skeleton: skel_dir = "{:s}/skel".format(base_sub_dir) skel_filename = "{:s}/skel_{:s}_{:s}_{:s}_{:s}_{:s}_{:s}_.dat".format(skel_dir,day,hour,minute,second,secondCount,ms_str) if anonomize: maskDir = "{:s}/mask".format(base_sub_dir) maskName = "{:s}/mask_{:s}_{:s}_{:s}_{:s}_{:s}_{:s}_.jpg".format(skel_dir,day,hour,minute,second,secondCount,ms_str) ''' Create folders if they doesn't exist ''' createDirectory(depth_dir) createDirectory(color_dir) if get_skeleton: createDirectory(skel_dir) if anonomize: createDirectory(maskDir) ''' Save data ''' save_frame(depth_filename, depthRaw, color_filename, colorRaw, skel_filename, users, maskName=maskName, mask=foregroundMask) prevFrameTime = time.time() ''' Display skeletons ''' if viz: d = np.array(depthRaw) d /= (np.nanmin([d.max(), 2**16])/256.0) d = d.astype(np.uint8) if get_skeleton: for u_key in users.keys(): u = users[u_key] pt = skel2depth(np.array([u.com]))[0] w = 10 d[pt[1]-w:pt[1]+w, pt[0]-w:pt[0]+w] = 255 w = 3 if u.tracked: pts = skel2depth(np.array(u.jointPositions.values()), d.shape) d = display_skeletons(d, pts, (100,0,0), skel_type='Kinect') if 1: cv2.imshow("imageD", d) if 0: cv2.imshow("imageM", colorRaw + (255-colorRaw)*(foregroundMask>0)[:,:,np.newaxis] + 50*(((foregroundMask)[:,:,np.newaxis]))) if 1: cv2.imshow("imageC", colorRaw) ret = cv2.waitKey(10) if ret >= 0: break
def main(anonomization=False): # Setup kinect data player # cam = KinectPlayer(base_dir='./', bg_subtraction=False, get_depth=True, get_color=True, get_skeleton=False) # actions = [2, 5, 7] actions = [2] actions_labels = ['JumpingJacks', 'Waving', 'Clapping'] action = 'JumpingJacks' subjects = range(1,10) cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) if anonomization: ''' bg_type can be: 'box'[param=max_depth] 'static'[param=background] 'mean' 'median' 'adaptive_mog' See BasePlayer for more details ''' cam.set_bg_model(bg_type='box', param=2600) # Test HOG pedestrian detector # cv2.HOGDescriptor_getDefaultPeopleDetector() # imshow(color[people]) body_data = [[]] framerate = 1 prev_n = len(cam.kinect_folder_names) action_idx = 0 # embed() while cam.next(framerate): if len(cam.kinect_folder_names) != prev_n: # action_idx += 1 body_data += [[]] prev_n = len(cam.kinect_folder_names) body_data[-1] += [cam.users_uv[0]] continue if 0: people_all = list(cv.HOGDetectMultiScale(cv.fromarray(cam.colorIm.mean(-1).astype(np.uint8)), cv.CreateMemStorage(0), hit_threshold=-1.5)) print len(people_all), " people detected" for i,people in enumerate(people_all): # embed() try: print people # tmp = cam.colorIm[people[0][0]:people[0][0]+people[1][0], people[0][1]:people[0][1]+people[1][1]] # tmp = cam.colorIm[people[0][1]:people[0][1]+people[1][1], people[0][0]:people[0][0]+people[1][0]] cam.colorIm[people[0][1]:people[0][1]+people[1][1], people[0][0]:people[0][0]+people[1][0]] += 50 # cv2.imshow("Body2 "+str(i), tmp) # subplot(4, len(people_all)/4, i + 1) # imshow(tmp) # tmp = cam.colorIm[people[0][0]:people[0][0]+people[0][1], people[1][0]:people[1][0]+people[1][1]] # cv2.imshow("Body1 "+str(i), tmp) # tmp = cam.colorIm[people[1][0]:people[1][0]+people[1][1], people[0][0]:people[0][0]+people[0][1]] # print tmp # cv2.imshow("Body "+str(i), tmp) except: pass # show() if anonomization and cam.mask is not None: mask = (sm.imresize(cam.mask, [480,640]) > 0).copy() mask[40:170, 80:140] = True # person in background px = draw.circle(145,285, 40) mask[px] = True # Version 1 - Blur + Circle color = cam.colorIm.copy() color[mask] = cv2.GaussianBlur(color, (29,29), 50)[mask] subplot(1,3,1) imshow(color) # Version 2 - Noise on mask color = cam.colorIm.copy() noise_key = np.random.randint(0, 255, cam.colorIm.shape).astype(np.uint8) color[mask] = color[mask]+noise_key[mask] subplot(1,3,2) imshow(color) # Version 3 - All noise color = cam.colorIm.copy() color = color+noise_key subplot(1,3,3) imshow(color) show() # cam.colorIm = cam.colorIm[:,:,[2,1,0]] # cam.colorIm *= mask[:,:,None] # cam.visualize(color=True, depth=True, text=True, colorize=True, depth_bounds=[0,4000]) # cam.visualize() cam.colorIm = display_skeletons(cam.colorIm, cam.users_uv[0], skel_type='Kinect', color=(0,255,0)) cam.visualize() body_data[actions_labels[action_idx]] += [cam.users_uv[0]] # cam.visualize(color=True, depth=True, text=False, colorize=False, depth_bounds=None) print 'Done' # Pause at the end # embed() import scipy.io as si si.matlab.savemat(action+".mat", {'data':body_data})
def main(visualize=False, learn=False, actions=None, subjects=None, n_frames=220): # learn = True # learn = False if actions is []: actions = [2] if subjects is []: subjects = [2] # actions = [1] # actions = [1, 2, 3, 4, 5] # subjects = [1] if 1: MHAD = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) else: MHAD = False cam = KinectPlayer(base_dir='./', device=1, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif') # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape([240,320]).clip(0, 4500) height, width = cam.depthIm.shape skel_previous = None face_detector = FaceDetector() hand_detector = HandDetector(cam.depthIm.shape) # curve_detector = CurveDetector(cam.depthIm.shape) # Video writer # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240)) # Save Background model # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I') # im.save("/Users/Colin/Desktop/k2.png") # Setup pose database append = True append = False # pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0,4,7,10,13], append=append) pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0,2,4,5,7,10,13], append=append) # Setup Tracking skel_init, joint_size, constraint_links, features_joints,skel_parts, convert_to_kinect = get_14_joint_properties() constraint_values = [] for c in constraint_links: constraint_values += [np.linalg.norm(skel_init[c[0]]-skel_init[c[1]], 2)] constraint_values = np.array(constraint_values) skel_current = None#skel_init.copy() skel_previous = None#skel_current.copy() # Evaluation accuracy_all_db = [] accuracy_all_track = [] joint_accuracy_db = [] joint_accuracy_track = [] # geo_accuracy = [] # color_accuracy = [] # lbp_accuracy = [] frame_count = 0 frame_rate = 1 if not MHAD: cam.next(350) frame_prev = 0 # try: if 1: while cam.next(frame_rate):# and frame_count < n_frames: if frame_count - frame_prev > 100: print "" print "Frame #{0:d}".format(frame_count) frame_prev = frame_count if not MHAD: if len(cam.users) == 0: continue else: # cam.users = [np.array(cam.users[0]['jointPositions'].values())] if np.any(cam.users[0][0] == -1): continue cam.users[0][:,1] *= -1 cam.users_uv_msr = [cam.camera_model.world2im(cam.users[0], [240,320])] # Apply mask to image if MHAD: mask = cam.get_person(2) > 0 else: mask = cam.get_person() > 0 if np.all(mask==False): continue im_depth = cam.depthIm cam.depthIm[cam.depthIm>3000] = 0 im_color = cam.colorIm*mask[:,:,None] cam.colorIm *= mask[:,:,None] pose_truth = cam.users[0] pose_truth_uv = cam.users_uv_msr[0] # Get bounding box around person box = nd.find_objects(mask)[0] d = 20 # Widen box box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) box_corner = [box[0].start,box[1].start] ''' ---------- ----------------------------------- --------''' ''' ----------- Feature Detector centric approach ---------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Detectors ---- ''' # Face detection face_detector.run(im_color[box]) # Skin detection hand_markers = hand_detector.run(im_color[box], n_peaks=3) # Calculate Geodesic Extrema im_pos = cam.camera_model.im2PosIm(cam.depthIm*mask)[box] * mask[box][:,:,None] geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=5, visualize=False) _, geo_map = geodesic_extrema_MPI(im_pos, iterations=1, visualize=True) geodesic_markers_pos = im_pos[geodesic_markers[:,0], geodesic_markers[:,1]] markers = list(geodesic_markers) + list(hand_markers) #+ list(lop_markers) + curve_markers markers = np.array([list(x) for x in markers]) ''' ---- Database lookup ---- ''' if 1: pts_mean = im_pos[(im_pos!=0)[:,:,2]].mean(0) if learn: # Normalize pose pose_uv = cam.users_uv[0] if np.any(pose_uv==0): print "skip" frame_count += frame_rate continue pose_database.update(pose_truth - pts_mean) else: # Concatenate markers markers = list(geodesic_markers) + hand_markers markers = np.array([list(x) for x in markers]) # Normalize pose pts = im_pos[markers[:,0], markers[:,1]] pts = np.array([x for x in pts if x[0] != 0]) pts -= pts_mean # Get closest pose pose = pose_database.query(pts, knn=5) # embed() for i in range(5): pose_tmp = cam.camera_model.world2im(pose[i]+pts_mean, cam.depthIm.shape) cam.colorIm = display_skeletons(cam.colorIm, pose_tmp, skel_type='Kinect', color=(0,i*40+50,0)) pose = pose[0] # im_pos -= pts_mean # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000) # pose = np.dot(R.T, pose.T).T - t # pose = np.dot(R, pose.T).T + t pose += pts_mean pose_uv = cam.camera_model.world2im(pose, cam.depthIm.shape) # print pose surface_map = nd.distance_transform_edt(-nd.binary_erosion(mask[box]), return_distances=False, return_indices=True) try: pose_uv[:,:2] = surface_map[:, pose_uv[:,0]-box_corner[0], pose_uv[:,1]-box_corner[1]].T + [box_corner[0], box_corner[1]] except: pass pose = cam.camera_model.im2world(pose_uv, cam.depthIm.shape) # print pose ''' ---- Tracker ---- ''' # surface_map = nd.distance_transform_edt(-mask[box], return_distances=False, return_indices=True) # surface_map = nd.distance_transform_edt(im_pos[:,:,2]==0, return_distances=False, return_indices=True) if skel_previous is None: # if 1: skel_previous = pose.copy() skel_current = pose.copy() skel_previous_uv = pose_uv.copy() skel_current_uv = pose_uv.copy() for _ in range(1): # ---- (Step 1A) Find feature coordespondences ---- try: skel_previous_uv[:,:2] = surface_map[:, skel_previous_uv[:,0]-box_corner[0], skel_previous_uv[:,1]-box_corner[1]].T + [box_corner[0], box_corner[1]] except: pass skel_current = cam.camera_model.im2world(skel_previous_uv, cam.depthIm.shape) # Alternative method: use kdtree ## Calc euclidian distance between each pixel and all joints px_corr = np.zeros([im_pos.shape[0], im_pos.shape[1], len(skel_current)]) # for i,s in enumerate(pose): # for i,s in enumerate(skel_current): # px_corr[:,:,i] = np.sqrt(np.sum((im_pos - s)**2, -1))# / joint_size[i]**2 # for i,s in enumerate(pose_uv): # Geodesics for i,s in enumerate(skel_previous_uv): ''' Problem: need to constrain pose_uv to mask ''' _, geo_map = geodesic_extrema_MPI(im_pos, [s[0]-box_corner[0],s[1]-box_corner[1]], iterations=1, visualize=True) px_corr[:,:,i] = geo_map subplot(2,7,i+1) # imshow(geo_map, vmin=0, vmax=2000) # axis('off') px_corr[geo_map==0,i] = 9999 cv2.imshow('gMap', (px_corr.argmin(-1)+1)/15.*mask[box]) ## Handle occlusions by argmax'ing over set of skel parts # visible_configurations = list(it.product([0,1], repeat=5))[1:] visible_configurations = [ # [0,1,1,1,1], # [1,0,0,0,0], [1,1,1,1,1] ] px_visibility_label = np.zeros([im_pos.shape[0], im_pos.shape[1], len(visible_configurations)], dtype=np.uint8) visible_scores = np.ones(len(visible_configurations))*np.inf # Try each occlusion configuration set for i,v in enumerate(visible_configurations): visible_joints = list(it.chain.from_iterable(skel_parts[np.array(v)>0])) px_visibility_label[:,:,i] = np.argmin(px_corr[:,:,visible_joints], -1)#.reshape([im_pos.shape[0], im_pos.shape[1]]) visible_scores[i] = np.min(px_corr[:,:,visible_joints], -1).sum() # Choose best occlusion configuration occlusion_index = np.argmin(visible_scores) occlusion_configuration = visible_configurations[occlusion_index] occlusion_set = list(it.chain.from_iterable(skel_parts[np.array(visible_configurations[occlusion_index])>0])) # Choose label for pixels based on occlusion configuration px_label = px_visibility_label[:,:,occlusion_index]*mask[box] px_label_flat = px_visibility_label[:,:,occlusion_index][mask[box]].flatten() visible_joints = [1 if x in occlusion_set else 0 for x in range(len(pose))] # print visible_joints # Project distance to joint's radius px_joint_displacement = im_pos[mask[box]] - skel_current[px_label_flat] px_joint_magnitude = np.sqrt(np.sum(px_joint_displacement**2,-1)) joint_mesh_pos = skel_current[px_label_flat] + px_joint_displacement*(joint_size[px_label_flat]/px_joint_magnitude)[:,None] px_joint_displacement = joint_mesh_pos - im_pos[mask[box]] # Ensure pts aren't too far away px_joint_displacement[np.abs(px_joint_displacement) > 500] = 0 # embed() if 0: x = im_pos.copy()*0 x[mask[box]] = joint_mesh_pos for i in range(3): subplot(1,4,i+1) imshow(x[:,:,i]) axis('off') subplot(1,4,4) imshow((px_label+1)*mask[box]) # Calc the correspondance change in position for each joint correspondence_displacement = np.zeros([len(skel_current), 3]) ii = 0 for i,_ in enumerate(skel_current): if i in occlusion_set: labels = px_label_flat==i correspondence_displacement[i] = np.sum(px_joint_displacement[px_label_flat==ii], 0) / np.sum(px_joint_displacement[px_label_flat==ii]!=0) ii+=1 correspondence_displacement = np.nan_to_num(correspondence_displacement) # print correspondence_displacement # Viz correspondences if 0: x = im_pos.copy()*0 x[mask[box]] = px_joint_displacement for i in range(3): subplot(1,4,i+1) imshow(x[:,:,i]) axis('off') subplot(1,4,4) imshow((px_label+1)*mask[box]) # embed() # for j in range(3): # for i in range(14): # subplot(3,14,j*14+i+1) # imshow(x[:,:,j]*((px_label==i)*mask[box])) # axis('off') show() # ---- (Step 2) Update pose state, x ---- lambda_p = .0 lambda_c = 1. skel_prev_difference = (skel_current - skel_previous) # print skel_prev_difference skel_current = skel_previous \ + lambda_p * skel_prev_difference \ - lambda_c * correspondence_displacement#\ # ---- (Step 3) Add constraints ---- if 1: # A: Link lengths / geometry # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) skel_current = collision_constraints(skel_current, constraint_links) skel_img_box = (cam.camera_model.world2im(skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0])#/mask_interval skel_img_box = skel_img_box.clip([0,0,0], [box[0].stop-box[0].start-1, box[1].stop-box[1].start-1, 9999]) # skel_img_box = skel_img_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # B: Ray-cast constraints # embed() skel_current, skel_current_uv = ray_cast_constraints(skel_current, skel_img_box, im_pos, surface_map, joint_size) # skel_img_box -= [box[0].start, box[1].start, 0] # # Map back from mask to image # try: # skel_current_uv[:,:2] = surface_map[:, skel_img_box[:,0], skel_img_box[:,1]].T# + [box_corner[0], box_corner[1]] # except: # pass prob = link_length_probability(skel_current, constraint_links, constraint_values, 100) # print "Prob:", np.mean(prob), np.min(prob), prob print frame_count thresh = .05 if np.min(prob) < thresh:# and frame_count > 1: print 'Resetting pose' for c in constraint_links[prob<thresh]: for cc in c: skel_current_uv[c] = pose_uv[c] - [box[0].start, box[1].start, 0] skel_current[c] = pose[c] # skel_current_uv = pose_uv.copy() - [box[0].start, box[1].start, 0] # skel_current = pose.copy() skel_current_uv = skel_current_uv + [box[0].start, box[1].start, 0] skel_current = cam.camera_model.im2world(skel_current_uv, cam.depthIm.shape) else: skel_current_uv = (cam.camera_model.world2im(skel_current, cam.depthIm.shape)) # skel_img_box = skel_img_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # Update for next round skel_previous = skel_current.copy() skel_previous_uv = skel_current_uv.copy() ''' ---- Accuracy ---- ''' # embed() if 1 and not learn: # pose_truth = cam.users[0] error_db = pose_truth - pose error_track = pose_truth - skel_current # print "Error", error error_l2_db = np.sqrt(np.sum(error_db**2, 1)) error_l2_track = np.sqrt(np.sum(error_track**2, 1)) joint_accuracy_db += [error_l2_db] joint_accuracy_track += [error_l2_track] accuracy_db = np.sum(error_l2_db < 150) / 14. accuracy_track = np.sum(error_l2_track < 150) / 14. print "Current db:", accuracy_db, error_l2_db.mean() print "Current track:", accuracy_track, error_l2_track.mean() print "" accuracy_all_db += [accuracy_db] accuracy_all_track += [accuracy_track] # print "Running avg:", np.mean(accuracy_all) # print "Joint avg (per-joint):", np.mean(joint_accuracy_all, -1) # print "Joint avg (overall):", np.mean(joint_accuracy_all) ''' --- Visualization --- ''' display_markers(cam.colorIm, hand_markers[:2], box, color=(0,250,0)) if len(hand_markers) > 2: display_markers(cam.colorIm, [hand_markers[2]], box, color=(0,200,0)) display_markers(cam.colorIm, geodesic_markers, box, color=(200,0,0)) # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100)) # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200)) cam.colorIm = display_skeletons(cam.colorIm, pose_truth_uv, skel_type='Kinect', color=(0,255,0)) cam.colorIm = display_skeletons(cam.colorIm, pose_uv, skel_type='Kinect') cam.colorIm = display_skeletons(cam.colorIm, skel_current_uv, skel_type='Kinect', color=(0,0,255)) # cam.visualize(color=True, depth=False) cam.visualize(color=True, depth=True) # embed() # ------------------------------------------------------------ # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8)) # video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count += frame_rate # except: # pass print "-- Results for subject {:d} action {:d}".format(subjects[0],actions[0]) print "Running avg (db):", np.mean(accuracy_all_db) print "Running avg (track):", np.mean(accuracy_all_track) print "Joint avg (overall db):", np.mean(joint_accuracy_db) print "Joint avg (overall track):", np.mean(joint_accuracy_track) # print 'Done' embed() return
def main(visualize=False, learn=False, actions=[1], subjects=[1], n_frames=220): # search_joints=[0,2,4,5,7,10,13] search_joints=range(14) # interactive = True interactive = False save_results = False if 0: learn = False # learn = learn else: learn = True actions = [1] subjects = [1] # actions = range(1,10) # subjects = range(1,9) if 1: dataset = 'MHAD' cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) elif 0: dataset = 'JHU' cam = KinectPlayer(base_dir='./', device=1, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif') # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/Wall_Background_A.tif') # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/Office_Background_A.tif') # bg = Image.open('/Users/colin/Data/WICU_May2013_C2/WICU_C2_Background.tif') # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape([240,320]).clip(0, 4500) - 000. else: # Realtime dataset = 'RT' cam = RealtimePlayer(device=0, edit=True, get_depth=True, get_color=True, get_skeleton=True) # cam.set_bg_model('box', 2500) tmp = cam.depthIm tmp[tmp>4000] = 4000 cam.set_bg_model(bg_type='static', param=tmp) # embed() height, width = cam.depthIm.shape skel_previous = None face_detector = FaceDetector() hand_detector = HandDetector(cam.depthIm.shape) n_joints = 14 # gmm = GMM(n_components=n_joints) kmeans = KMeans(n_clusters=n_joints, n_init=4, max_iter=100) # Video writer # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (640,480)) # Save Background model # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I') # im.save("/Users/Colin/Desktop/k2.png") # Setup pose database append = True # append = False # pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0,4,7,10,13], append=append) # pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=search_joints, # append=append, scale=1.1, n_clusters=-1)#1000 pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=search_joints, append=append, scale=1.0, n_clusters=1500) pose_prob = np.ones(len(pose_database.database), dtype=np.float)/len(pose_database.database) # embed() # Setup Tracking skel_init, joint_size, constraint_links, features_joints,skel_parts, convert_to_kinect = get_14_joint_properties() constraint_values = [] for c in constraint_links: constraint_values += [np.linalg.norm(skel_init[c[0]]-skel_init[c[1]], 2)] constraint_values = np.array(constraint_values) skel_current = None#skel_init.copy() skel_previous = None#skel_current.copy() skel_previous_uv = None # Evaluation accuracy_all_db = [] accuracy_all_track = [] joint_accuracy_db = [] joint_accuracy_track = [] if not learn: try: results = pickle.load(open('Accuracy_Results.pkl')) except: results = { 'subject':[], 'action':[], 'accuracy_all':[], 'accuracy_mean':[], 'joints_all':[], 'joint_mean':[], 'joint_median':[]} frame_count = 0 frame_rate = 10 if dataset == 'JHU': cam.next(350) # cam.next(700) pass frame_prev = 0 try: # if 1: while cam.next(frame_rate):# and frame_count < n_frames: # Print every once in a while if frame_count - frame_prev > 99: print "" print "Frame #{0:d}".format(frame_count) frame_prev = frame_count if dataset in ['MHAD', 'JHU']: users = deepcopy(cam.users) else: users = deepcopy(cam.user_skels) ground_truth = False if dataset in ['RT','JHU']: if len(users) > 0: if not np.any(users[0][0] == -1): ground_truth = True users[0][:,1] *= -1 cam.users_uv_msr = [cam.camera_model.world2im(users[0], cam.depthIm.shape)] else: ground_truth = True # Apply mask to image mask = cam.get_person(200) == 1 # > 0 # cv2.imshow('bg',(mask*255).astype(np.uint8)) # cv2.imshow('bg',cam.colorIm) # cv2.waitKey(1) if type(mask)==bool or np.all(mask==False): # print "No mask" continue # cv2.imshow('bg',cam.bgSubtraction.backgroundModel) # cv2.imshow('bg',(mask*255).astype(np.uint8)) im_depth = cam.depthIm # if dataset in ['RT']: # cam.depthIm[cam.depthIm>2500] = 0 if cam.colorIm is not None: im_color = cam.colorIm*mask[:,:,None] cam.colorIm *= mask[:,:,None] if ground_truth: pose_truth = users[0] pose_truth_uv = cam.users_uv_msr[0] # Get bounding box around person box = nd.find_objects(mask)[0] d = 20 # Widen box box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) box_corner = [box[0].start,box[1].start] mask_box = mask[box] ''' ---------- ----------------------------------- --------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Detectors ---- ''' # Face detection # face_detector.run(im_color[box]) # Skin detection # hand_markers = hand_detector.run(im_color[box], n_peaks=3) hand_markers = [] # Calculate Geodesic Extrema im_pos = cam.camera_model.im2PosIm(cam.depthIm*mask)[box] # geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=5, visualize=False) if 1: ''' Find pts using kmeans or gmm ''' pts = im_pos[np.nonzero(im_pos)].reshape([-1,3]) # gmm.fit(pts) kmeans.fit(pts) # pts = cam.camera_model.world2im(gmm.means_) pts = cam.camera_model.world2im(kmeans.cluster_centers_) geodesic_markers = pts[:,:2] - box_corner else: ''' Find pts using geodesic extrema ''' geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=10, visualize=False) if len(geodesic_markers) == 0: print "No markers" continue # Concatenate markers markers = list(geodesic_markers) + list(hand_markers) #+ list(lop_markers) + curve_markers markers = np.array([list(x) for x in markers]) if np.any(markers==0): print "Bad markers" continue ''' ---- Database lookup ---- ''' time_t0 = time.time() pts_mean = im_pos[(im_pos!=0)[:,:,2]].mean(0) if learn and ground_truth: # pose_uv = pose_truth_uv if np.any(pose_truth_uv==0): frame_count += frame_rate if not interactive: continue # Markers can be just outside of bounds markers = list(geodesic_markers) + hand_markers markers = np.array([list(x) for x in markers]) # pose_database.update(pose_truth-pts_mean, keys=im_pos[markers[:,0],markers[:,1]]-pts_mean) pose_database.update(pose_truth-pts_mean) if not interactive: continue # else: if 1: # Normalize pose pts = im_pos[markers[:,0], markers[:,1]] pts = np.array([x for x in pts if x[0] != 0]) pts -= pts_mean # Get closest pose # Based on markers/raw positions # poses_obs, pose_error = pose_database.query(pts, knn=1, return_error=True) pose_error = pose_query(pts, np.array(pose_database.database), search_joints=search_joints) # pose_error = query_error(pts, pose_database.trees, search_joints=search_joints) # Based on markers/keys: # pts = im_pos[markers[:,0], markers[:,1]] - pts_mean # # poses, pose_error = pose_database.query_tree(pts, knn=len(pose_database.database), return_error=True) # # poses, pose_error = pose_database.query_flann(pts, knn=len(pose_database.database), return_error=True) # pose_error = np.sqrt(np.sum((pose_database.keys - pts.reshape([27]))**2, 1)) observation_variance = 100. prob_obervation = np.exp(-pose_error / observation_variance) / np.sum(np.exp(-pose_error/observation_variance)) # subplot(2,2,1) # plot(prob_obervation) # subplot(2,2,2) # plot(prob_motion) # subplot(2,2,3) # plot(pose_prob_new) # subplot(2,2,4) # plot(pose_prob) # show() # inference = 'NN' inference = 'Bayes' # inference = 'PF' if inference=='NN': # Nearest neighbor poses_obs, _ = pose_database.query(pts, knn=1, return_error=True) poses = [poses_obs[0]] elif inference=='Bayes': # Bayes if frame_count is 0: poses_obs, _ = pose_database.query(pts, knn=1, return_error=True) skel_previous = poses_obs[0].copy() # poses_m, pose_m_error = pose_database.query(skel_previous-pts_mean, knn=1, return_error=True) pose_m_error = pose_query(skel_previous-pts_mean, np.array(pose_database.database), search_joints=search_joints) # poses_m, pose_m_error = pose_database.query(skel_previous-pts_mean+(np.random.random([3,14])-.5).T*30, knn=5, return_error=True) motion_variance = 10000. prob_motion = np.exp(-pose_m_error / motion_variance) / np.sum(np.exp(-pose_m_error/motion_variance)) pose_prob_new = prob_obervation*prob_motion if pose_prob_new.shape == pose_prob.shape: pose_prob = (pose_prob_new+pose_prob).T/2. else: pose_prob = pose_prob_new.T prob_sorted = np.argsort(pose_prob) poses = [pose_database.database[np.argmax(pose_prob)]] # poses = pose_database.database[prob_sorted[-1:]] # Particle Filter elif inference=='PF': prob_sorted = np.argsort(pose_prob) poses = pose_database.database[prob_sorted[-5:]] ## ICP # im_pos -= pts_mean # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000) # pose = np.dot(R.T, pose.T).T - t # pose = np.dot(R, pose.T).T + t # scale = 1. # poses *= scale poses += pts_mean # print "DB time:", time.time() - time_t0 ''' ---- Tracker ---- ''' surface_map = nd.distance_transform_edt(-nd.binary_erosion(mask_box), return_distances=False, return_indices=True) if skel_previous_uv is None: skel_previous = poses[0].copy() skel_current = poses[0].copy() pose_tmp = cam.camera_model.world2im(poses[0], cam.depthIm.shape) skel_previous_uv = pose_tmp.copy() skel_current_uv = pose_tmp.copy() pose_weights = np.zeros(len(poses), dtype=np.float) pose_updates = [] pose_updates_uv = [] time_t0 = time.time() # 2) Sample poses if inference in ['PF', 'Bayes']: for pose_i, pose in enumerate(poses): skel_current = skel_previous.copy() skel_current_uv = skel_previous_uv.copy() pose_uv = cam.camera_model.world2im(pose, cam.depthIm.shape) try: pose_uv[:,:2] = surface_map[:, pose_uv[:,0]-box_corner[0], pose_uv[:,1]-box_corner[1]].T + [box_corner[0], box_corner[1]] except: pass pose = cam.camera_model.im2world(pose_uv, cam.depthIm.shape) # ---- (Step 2) Update pose state, x ---- correspondence_displacement = skel_previous - pose lambda_p = .0 lambda_c = 1. skel_prev_difference = (skel_current - skel_previous) # print skel_prev_difference skel_current = skel_previous \ + lambda_p * skel_prev_difference \ - lambda_c * correspondence_displacement#\ # ---- (Step 3) Add constraints ---- # A: Link lengths / geometry # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) # skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) # skel_current = collision_constraints(skel_current, constraint_links) skel_current_uv = (cam.camera_model.world2im(skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0])#/mask_interval skel_current_uv = skel_current_uv.clip([0,0,0], [box[0].stop-box[0].start-1, box[1].stop-box[1].start-1, 9999]) # B: Ray-cast constraints skel_current, skel_current_uv = ray_cast_constraints(skel_current, skel_current_uv, im_pos, surface_map, joint_size) # Map back from mask to image # try: # skel_current_uv[:,:2] = surface_map[:, skel_current_uv[:,0], skel_current_uv[:,1]].T# + [box_corner[0], box_corner[1]] # except: # pass # ---- (Step 4) Update the confidence ---- if inference=='PF': time_t1 = time.time() ## Calc distance between each pixel and all joints px_corr = np.zeros([im_pos.shape[0], im_pos.shape[1], 14]) for i,s in enumerate(skel_current): px_corr[:,:,i] = np.sqrt(np.sum((im_pos - s)**2, -1))# / joint_size[i]**2 # for i,s in enumerate(pose_uv): # for i,s in enumerate(skel_current_uv): # ''' Problem: need to constrain pose_uv to mask ''' # _, geo_map = geodesic_extrema_MPI(im_pos, [s[0],s[1]], iterations=1, visualize=True) # px_corr[:,:,i] = geo_map # subplot(2,7,i+1) # imshow(geo_map, vmin=0, vmax=2000) # axis('off') # px_corr[geo_map==0,i] = 9999 px_label = np.argmin(px_corr, -1)*mask_box px_label_flat = px_label[mask_box].flatten() # cv2.imshow('gMap', (px_corr.argmin(-1)+1)/15.*mask_box) # cv2.waitKey(1) # Project distance to joint's radius px_joint_displacement = im_pos[mask_box] - skel_current[px_label_flat] px_joint_magnitude = np.sqrt(np.sum(px_joint_displacement**2,-1)) joint_mesh_pos = skel_current[px_label_flat] + px_joint_displacement*(joint_size[px_label_flat]/px_joint_magnitude)[:,None] px_joint_displacement = joint_mesh_pos - im_pos[mask_box] # Ensure pts aren't too far away (these are noise!) px_joint_displacement[np.abs(px_joint_displacement) > 500] = 0 if 0: x = im_pos.copy()*0 x[mask_box] = joint_mesh_pos for i in range(3): subplot(1,4,i+1) imshow(x[:,:,i]) axis('off') subplot(1,4,4) imshow((px_label+1)*mask_box) # Calc the correspondance change in position for each joint correspondence_displacement = np.zeros([len(skel_current), 3]) ii = 0 for i,_ in enumerate(skel_current): labels = px_label_flat==i correspondence_displacement[i] = np.sum(px_joint_displacement[px_label_flat==ii], 0) / np.sum(px_joint_displacement[px_label_flat==ii]!=0) ii+=1 correspondence_displacement = np.nan_to_num(correspondence_displacement) # print "time:", time.time() - time_t1 # Likelihood motion_variance = 500 prob_motion = np.exp(-np.mean(np.sum((pose-skel_previous)**2,1)/motion_variance**2)) if inference == 'PF': correspondence_variance = 40 prob_coor = np.exp(-np.mean(np.sum(correspondence_displacement**2,1)/correspondence_variance**2)) prob = prob_motion * prob_coor prob = prob_motion # Viz correspondences # x = im_pos.copy()*0 # x[mask_box] = px_joint_displacement # for i in range(3): # subplot(1,4,i+1) # imshow(x[:,:,i]) # axis('off') # subplot(1,4,4) # imshow((px_label+1)*mask_box) # # embed() # # for j in range(3): # # for i in range(14): # # subplot(3,14,j*14+i+1) # # imshow(x[:,:,j]*((px_label==i)*mask_box)) # # axis('off') # show() # prob = link_length_probability(skel_current, constraint_links, constraint_values, 100) # print frame_count # print "Prob:", np.mean(prob)#, np.min(prob), prob # thresh = .05 # if np.min(prob) < thresh: # # print 'Resetting pose' # for c in constraint_links[prob<thresh]: # for cc in c: # skel_current_uv[c] = pose_uv[c] - [box[0].start, box[1].start, 0] # skel_current[c] = pose[c] # skel_current_uv = pose_uv.copy() - [box[0].start, box[1].start, 0] # skel_current = pose.copy() skel_current_uv = skel_current_uv + [box[0].start, box[1].start, 0] skel_current = cam.camera_model.im2world(skel_current_uv, cam.depthIm.shape) # print 'Error:', np.sqrt(np.sum((pose_truth-skel_current)**2, 0)) pose_weights[pose_i] = prob # pose_updates += [skel_current.copy()] # pose_updates_uv += [skel_current_uv.copy()] pose_updates += [pose.copy()] pose_updates_uv += [pose_uv.copy()] if cam.colorIm is not None: cam.colorIm = display_skeletons(cam.colorIm, skel_current_uv, skel_type='Kinect', color=(0,0,pose_i*40+50)) else: cam.depthIm = display_skeletons(cam.depthIm, skel_current_uv, skel_type='Kinect', color=(0,0,pose_i*40+50)) # cam.colorIm = display_skeletons(cam.colorIm, pose_uv, skel_type='Kinect', color=(0,pose_i*40+50,pose_i*40+50)) # print "Tracking time:", time.time() - time_t0 # Update for next round pose_ind = np.argmax(pose_weights) # print "Pickled:", pose_ind skel_previous = pose_updates[pose_ind].copy() skel_previous_uv = pose_updates_uv[pose_ind].copy() # print pose_weights else: pose = poses[0] skel_previous = pose.copy() pose_uv = cam.camera_model.world2im(skel_previous, cam.depthIm.shape) skel_current_uv = pose_uv.copy() skel_previous_uv = pose_uv.copy() ''' ---- Accuracy ---- ''' if ground_truth: error_track = pose_truth - skel_previous error_track *= np.any(pose_truth!=0, 1)[:,None] error_l2_track = np.sqrt(np.sum(error_track**2, 1)) joint_accuracy_track += [error_l2_track] accuracy_track = np.sum(error_l2_track < 150) / n_joints accuracy_all_track += [accuracy_track] print "Current track: {}% {} mm".format(accuracy_track, error_l2_track.mean()) print "Running avg (track):", np.mean(accuracy_all_track) # print "Joint avg (overall track):", np.mean(joint_accuracy_track) print "" ''' --- Visualization --- ''' if visualize: display_markers(cam.colorIm, hand_markers[:2], box, color=(0,250,0)) if len(hand_markers) > 2: display_markers(cam.colorIm, [hand_markers[2]], box, color=(0,200,0)) display_markers(cam.colorIm, geodesic_markers, box, color=(200,0,0)) # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100)) # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200)) if ground_truth: cam.colorIm = display_skeletons(cam.colorIm, pose_truth_uv, skel_type='Kinect', color=(0,255,0)) cam.colorIm = display_skeletons(cam.colorIm, skel_current_uv, skel_type='Kinect', color=(255,0,0)) cam.visualize(color=True, depth=False) # ------------------------------------------------------------ # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8)) # video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count += frame_rate print "Frame:", frame_count except: traceback.print_exc(file=sys.stdout) pass try: print "-- Results for subject {:d} action {:d}".format(subjects[0],actions[0]) except: pass # print "Running avg (db):", np.mean(accuracy_all_db) print "Running mean (track):", np.mean(accuracy_all_track) # print "Joint avg (overall db):", np.mean(joint_accuracy_db) print "Joint mean (overall track):", np.mean(joint_accuracy_track) print "Joint median (overall track):", np.median(joint_accuracy_track) # print 'Done' embed() if learn: pose_database.save() elif save_results: # Save results: results['subject'] += [subjects[0]] results['action'] += [actions[0]] results['accuracy_all'] += [accuracy_all_track] results['accuracy_mean'] += [np.mean(accuracy_all_track)] results['joints_all'] += [joint_accuracy_track] results['joint_mean'] += [np.mean(joint_accuracy_track)] results['joint_median'] += [np.median(joint_accuracy_track)] pickle.dump(results, open('/Users/colin/Data/BerkeleyMHAD/Accuracy_Results.pkl', 'w'))
def main(get_depth, get_color, get_skeleton, visualize): # fill = True fill = False get_color = True # VP = KinectPlayer(base_dir='./', device=device, get_depth=get_depth, get_color=get_color, get_skeleton=get_skeleton, get_mask=get_mask) # VP = KinectPlayer(base_dir='./', device=1, bg_subtraction=True, get_depth=get_depth, get_color=get_color, get_skeleton=get_skeleton, fill_images=fill) VP = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=False, fill_images=fill) cam_count = 1 if cam_count == 2: VP2 = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=False, fill_images=fill) # Transformation matrix from first to second camera # data = pickle.load(open("./Registration.dat", 'r')) # transform_c1_to_c2 = data['transform'] # print 'aaaaaaaaaaaaaaaa' # embed() while VP.next(): # VP.update_background() # Transform skels from cam1 to cam2 if get_skeleton: VP_skels = [np.array(VP.users[s]['jointPositions'].values()) for s in VP.users.keys()] if cam_count == 2: VP2.sync_cameras(VP) if get_skeleton: VP2_skels = transform_skels(VP_skels, transform_c1_to_c2) VP.colorIm = VP.colorIm[:,:,[2,1,0]] # im_tmp = np.dstack([VP.depthIm/float(VP.depthIm.max()),\ # VP.depthIm/float(VP.depthIm.max()),\ # VP.colorIm.mean(-1)/255]) # im = felzenszwalb(im_tmp, scale=255) # im = felzenszwalb(VP.depthIm/float(VP.depthIm.max()), scale=255) # im = VP.depthIm/float(VP.depthIm.max()) * VP. # im = felzenszwalb(im, scale=255) # cv2.imshow("seg", im/float(im.max())) # cv2.waitKey(10) # embed() # Geodesic extrema if 0: if VP.foregroundMask is not None: im = VP.depthIm * (VP.foregroundMask>0) # cv2.imshow("im--", im/float(im.max())) if 1:#(im>0).sum() > 1000: # Extract person labelMask, boxes, labels, px_counts= extract_people(im, VP.foregroundMask, minPersonPixThresh=3000) # print labels, px_counts if len(labels) > 0: max_ind = np.argmax(px_counts) mask = labelMask==max_ind+1 im[-mask] = 0 edge_thresh = 200#10 gradients = np.gradient(im) mag = np.sqrt(gradients[0]**2+gradients[1]**2) im[mag>edge_thresh] = 0 # Segmentation experiment # im_s = VP.depthIm/float(VP.depthIm.max()) # im_s = felzenszwalb(im, scale=255) # cv2.imshow("seg", im_s/float(im_s.max())) x,y = nd.center_of_mass(im) if im[x,y] == 0: tmp = np.nonzero(im>0) x,y = [tmp[0][0], tmp[1][0]] # min_map = geodesic_extrema_MPI(im, centroid=[x,y], iterations=15) extrema = geodesic_extrema_MPI(im, centroid=[x,y], iterations=15) # embed() # for e, i in zip(extrema, range(20)): # if 0 < i < 6: # box_color = [255, 0, 0] # elif i==0: # box_color = [0, 0, 255] # else: # box_color = [255,255,255] # VP.colorIm[e[0]-4:e[0]+5, e[1]-4:e[1]+5] = box_color # cv2.imshow("ExtremaB", min_map/float(min_map.max())) # cv2.waitKey(500) cv2.waitKey(20) # cv2.imshow("bg model", VP.backgroundModel/float(VP.backgroundModel.max())) # cv2.imshow("foregroundMask", ((VP.foregroundMask>0)*255).astype(np.uint8)) if visualize: if cam_count == 2: for s in VP2_skels: VP2.depthIm = display_skeletons(VP2.depthIm, s, (VP2.depthIm.max(),), skel_type='Kinect') VP2.visualize() VP2.playback_control() VP.visualize()
def get_per_pixel_joints(im_depth, skel, alg='geodesic', radius=5): ''' Find the closest joint to each pixel using geodesic distances. ---Paramaters--- im_depth : should be masked depth image skel : in image coords alg : 'geodesic' or 'circular' radius : [only for circular] ''' height, width = im_depth.shape distance_ims = np.empty([height, width, N_SKEL_JOINTS]) MAX_VALUE = 32000 # Get rid of edges around joints edge_thresh = 200 #10 gradients = np.gradient(im_depth) mag = np.sqrt(gradients[0]**2 + gradients[1]**2) im_depth[mag > edge_thresh] = 0 joints_out_of_bounds = np.abs(im_depth[skel[:, 0], skel[:, 1]] - skel[:, 2]) > 100 # Only look at major joints for i, j in zip(SKEL_JOINTS, range(N_SKEL_JOINTS)): if skel[i] in joints_out_of_bounds: distance_ims[:, :, j] = MAX_VALUE continue pos = skel[i] if alg == 'geodesic': x = np.maximum(np.minimum(pos[1], height - 1), 0) y = np.maximum(np.minimum(pos[0], width - 1), 0) # if j == 0: # pts, min_map = generateKeypoints(np.ascontiguousarray(im_depth), centroid=[x,y], iterations=10, scale=6) im_dist = distance_map(np.ascontiguousarray(im_depth), centroid=[x, y], scale=6) distance_ims[:, :, j] = im_dist if 0: radius = 5 x = np.maximum(np.minimum(pos[1], height - 1 - radius), radius) y = np.maximum(np.minimum(pos[0], width - 1 - radius), radius) # print x,y pts = circle(x, y, radius) # print pts[0] max_ = distance_ims[distance_ims[:, :, j] < MAX_VALUE, j].max() distance_ims[distance_ims[:, :, j] >= MAX_VALUE, j] = max_ + 100 distance_ims[pts[0], pts[1], j] = max_ + 100 figure(1) subplot(3, 5, j + 1) imshow(distance_ims[:, :, j]) axis('off') # subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=.1, hspace=.1) tight_layout() subplots_adjust(left=.001, bottom=.001, wspace=.003, hspace=.003) elif alg == 'circular': x = np.maximum(np.minimum(pos[1], height - 1 - radius), radius) y = np.maximum(np.minimum(pos[0], width - 1 - radius), radius) pts = draw.circle(x, y, radius) closest_pos[pts] = i else: print "No algorithm type in 'get_per_pixel_joints'" if alg == 'geodesic': closest_pos = np.argmin(distance_ims, -1).astype(np.uint8) closest_pos_all = np.argsort(distance_ims, -1).astype(np.uint8) elif alg == 'circular': closest_pos = np.zeros_like(im_depth) #Change all background pixels # mask = im_depth!=0 # mask = (distance_ims.max(-1)<32000) mask = im_depth != 0 closest_pos[-mask] = N_MSR_JOINTS + 1 if 0: # Reorder for occlusions skel_depths = np.array([skel[i, 2] for i in SKEL_JOINTS]) skel_XYZ = np.array([skel[i] for i in SKEL_JOINTS]) pos_order = np.argsort(skel_depths).tolist() pos_order.reverse() residual = im_depth[mask] - skel_depths[closest_pos[mask]] im_tmp = np.zeros_like(im_depth) im_tmp[mask] = residual closest_arg = np.argmin(distance_ims + skel_depths, -1) closest_arg[-mask] = -1 # If geo dist < euc distance then kill # Go from back to front. Only care about things ahead from scipy.spatial import distance # euclid_dists = distance.squareform(distance.pdist(depth2world(skel_XYZ, [240,320]))) euclid_dists = distance.squareform(distance.pdist(skel_XYZ)) geo_dists = np.zeros_like(euclid_dists) for i in xrange(len(SKEL_JOINTS)): p = pos_order[i] # If a joint is behind it, don't care -- set to infinity geo_dists[p, :] = [ distance_ims[skel[j][0], skel[j][1], p] if j in pos_order[i:] else np.inf for j in SKEL_JOINTS ] # Check if it's incorrect at least 2x removed_joints = [] err = (geo_dists * 2. < euclid_dists) for i in xrange(len(err)): if err[i].sum(0) >= 1: # removed_joints += [i] distance_ims[:, :, i] = MAX_VALUE closest_pos_e = np.argmin(distance_ims, -1).astype(np.uint8) closest_pos_e[-mask] = N_MSR_JOINTS + 1 subplot(2, 2, 1) closest_pos = display_skeletons(closest_pos, skel, color=(23, ), skel_type='Low') imshow(closest_pos) axis('off') subplot(2, 2, 2) imshow(closest_pos_e) axis('off') subplot(2, 2, 3) imshow(closest_pos_e == closest_pos) axis('off') # blank = np.ones(300,300) # removed_text = '' # for i in removed_joints: # removed_text += str(i)+" " # cv2.putText() tight_layout() subplots_adjust(left=.001, bottom=.001, wspace=.003, hspace=.003) show() # embed() # closest_pos_e[im_depth==0] = N_MSR_JOINTS+1 # closest_pos2[-mask] = N_MSR_JOINTS+1 # closest_pos2[closest_pos>=32000] = N_MSR_JOINTS+1 # closest_arg[im_depth==0] = N_MSR_JOINTS+1 # figure(1); imshow(closest_arg); # figure(3); imshow(closest_pos2) # # Reorder # im_order = np.zeros_like(im_depth)-1 # for p in range(len(pos_order)): # mask_tmp = closest_pos==p # dists_tmp = distance_ims[mask_tmp, p] + skel[pos_order[p],2] # im_order[mask_tmp] = np.maximum(im_order[mask_tmp], dists_tmp) # figure(100) # imshow(closest_pos) # figure(101) # imshow(im_depth) # show() # embed() return closest_pos
def main(visualize=False, learn=False, actions=None, subjects=None, n_frames=220): # learn = True # learn = False if actions is []: actions = [2] if subjects is []: subjects = [2] # actions = [1] # actions = [1, 2, 3, 4, 5] # subjects = [1] if 1: MHAD = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) else: MHAD = False cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) bg = Image.open( '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif') bg = Image.open( '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape( [240, 320]).clip(0, 4500) height, width = cam.depthIm.shape skel_previous = None # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl')) # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl')) # clf_lbp,lbp_approx = pickle.load(open('lbp_histogram_approx_svm_5class.pkl')) face_detector = FaceDetector() hand_detector = HandDetector(cam.depthIm.shape) curve_detector = CurveDetector(cam.depthIm.shape) # Video writer # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240)) # Save Background model # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I') # im.save("/Users/Colin/Desktop/k2.png") # Setup pose database append = True append = False pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0, 4, 7, 10, 13], append=append) # Per-joint classification head_features = [] hand_features = [] feet_features = [] joint_features = { 'geodesic': [None] * 14, 'color_histograms': [None] * 14, 'lbp': [None] * 14 } # Evaluation accuracy_all = [] joint_accuracy_all = [] geo_accuracy = [] color_accuracy = [] lbp_accuracy = [] frame_count = 0 frame_rate = 2 if not MHAD: cam.next(350) frame_prev = 0 try: # if 1: while cam.next(frame_rate): # and frame_count < n_frames: if frame_count - frame_prev > 100: print "" print "Frame #{0:d}".format(frame_count) frame_prev = frame_count if not MHAD: if len(cam.users) == 0: continue else: # cam.users = [np.array(cam.users[0]['jointPositions'].values())] if np.any(cam.users[0][0] == -1): continue cam.users[0][:, 1] *= -1 cam.users_uv_msr = [ cam.camera_model.world2im(cam.users[0], [240, 320]) ] # Apply mask to image if MHAD: mask = cam.get_person(2) > 0 else: mask = cam.get_person() > 0 if np.all(mask == False): continue im_depth = cam.depthIm cam.depthIm[cam.depthIm > 3000] = 0 im_color = cam.colorIm * mask[:, :, None] cam.colorIm *= mask[:, :, None] pose_truth = cam.users[0] pose_truth_uv = cam.users_uv_msr[0] # Get bounding box around person box = nd.find_objects(mask)[0] d = 20 # Widen box box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) box_corner = [box[0].start, box[1].start] ''' ---------- ----------------------------------- --------''' ''' ----------- Feature Detector centric approach ---------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Detectors ---- ''' # Face detection face_detector.run(im_color[box]) # Skin detection hand_markers = hand_detector.run(im_color[box], n_peaks=3) # curve detection # curve_markers = curve_detector.run((im_depth*mask)[box], n_peaks=3) # Calculate LBPs ##Max P=31 for LBPs becuase of datatype # x = local_occupancy_pattern(cam.depthIm[box]*mask[box], [5,5,5],[3,3,3]) # lop_texture = local_binary_pattern_depth(cam.depthIm[box]*mask[box], 10, 20, px_diff_thresh=100)*mask[box] # lop_markers = []#peak_local_max(lop_texture, min_distance=20, num_peaks=5, exclude_border=False) # lbp_texture = local_binary_pattern(cam.depthIm[box]*mask[box], 6, 20)*mask[box] # Calculate Geodesic Extrema im_pos = cam.camera_model.im2PosIm( cam.depthIm * mask)[box] * mask[box][:, :, None] geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=5, visualize=False) # geodesic_markers, geo_map = geodesic_extrema_MPI(im_pos, iterations=5, visualize=True) geodesic_markers_pos = im_pos[geodesic_markers[:, 0], geodesic_markers[:, 1]] markers = list(geodesic_markers) + list( hand_markers) #+ list(lop_markers) + curve_markers markers = np.array([list(x) for x in markers]) if 1: ''' ---- Database lookup ---- ''' pts_mean = im_pos[(im_pos != 0)[:, :, 2]].mean(0) if learn: # Normalize pose pose_uv = cam.users_uv[0] if np.any(pose_uv == 0): print "skip" frame_count += frame_rate continue # print pose_truth[2], pts_mean pose_database.update(pose_truth - pts_mean) else: # Concatenate markers markers = list(geodesic_markers) + hand_markers # markers = list(geodesic_markers) + list(lop_markers) + curve_markers + hand_markers markers = np.array([list(x) for x in markers]) # Normalize pose pts = im_pos[markers[:, 0], markers[:, 1]] pts = np.array([x for x in pts if x[0] != 0]) pts -= pts_mean # Get closest pose pose = pose_database.query(pts, knn=1) # pose = pose_database.weighted_query(pts, knn=1) # pose = pose_database.reverse_query(pts[:,[1,0,2]]) # im_pos -= pts_mean # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000) # pose = np.dot(R.T, pose.T).T - t # pose = np.dot(R, pose.T).T + t pose += pts_mean pose_uv = cam.camera_model.world2im( pose, cam.depthIm.shape) # Constrain if 0: try: ''' This does worse because the joint may fall to a different part of the body (e.g. hand to torso) which throws the error upward ''' surface_map = nd.distance_transform_edt( im_pos[:, :, 2] == 0, return_distances=False, return_indices=True) pose_uv[:, :2] = surface_map[:, pose_uv[:, 0] - box_corner[0], pose_uv[:, 1] - box_corner[1]].T + [ box_corner[0], box_corner[1] ] pose = cam.camera_model.im2world(pose_uv) # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) # skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) # skel_current = collision_constraints(skel_current, constraint_links) # embed() # pose_uv_box = pose_uv - [box_corner[0], box_corner[1], 0] # pose_uv_box = pose_uv_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # joint_size = np.array([75]*14) # pose_n, pose_uv_n = ray_cast_constraints(pose, pose_uv_box, im_pos, surface_map, joint_size) # print 'Pose',pose,pose_n # pose = pose_n # pose_uv = pose_uv_n + [box_corner[0], box_corner[1], 0] except: print 'error constraining' # skel_previous = np.array(pose, copy=True) display_markers(cam.colorIm, hand_markers[:2], box, color=(0, 250, 0)) if len(hand_markers) > 2: display_markers(cam.colorIm, [hand_markers[2]], box, color=(0, 200, 0)) display_markers(cam.colorIm, geodesic_markers, box, color=(200, 0, 0)) # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100)) # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200)) if 0: ''' ---------- ----------------------------------- --------''' ''' ---------- Feature Descriptor centric approach --------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Descriptors ---- ''' hand_markers = np.array(hand_markers) # Geodesics geodesic_features = relative_marker_positions( im_pos, geodesic_markers_pos[:, [1, 0, 2]]) geodesic_features = np.sort(geodesic_features) # Color Histogram skin = skimage.exposure.rescale_intensity( hand_detector.im_skin, out_range=[0, 255]).astype(np.uint8) color_histograms = local_histograms( skin, n_bins=5, max_bound=255, patch_size=11) * mask[box][:, :, None] # LBP Histogram lbp_texture = local_binary_pattern( cam.depthIm[box] * mask[box], 6, 5) * mask[box] lbp_histograms = local_histograms( lbp_texture.astype(np.uint8), n_bins=10, max_bound=2**6, patch_size=11) * mask[box][:, :, None] # for i in range(10): # subplot(2,5,i+1) # imshow(lbp_histograms[:,:,i]) ''' ---- Per Joint Learning ---- ''' if learn: for ii, i in enumerate(pose_truth_uv): if i[0] != 0: try: if joint_features['geodesic'][ii] is None: joint_features['geodesic'][ ii] = geodesic_features[i[1] - box_corner[0], i[0] - box_corner[1]] else: joint_features['geodesic'][ii] = np.vstack( [ joint_features['geodesic'][ii], (geodesic_features[i[1] - box_corner[0], i[0] - box_corner[1]]) ]) if joint_features['color_histograms'][ ii] is None: joint_features['color_histograms'][ ii] = color_histograms[i[1] - box_corner[0], i[0] - box_corner[1]] else: joint_features['color_histograms'][ ii] = np.vstack([ joint_features['color_histograms'] [ii], deepcopy(color_histograms[ i[1] - box_corner[0], i[0] - box_corner[1]]) ]) if joint_features['lbp'][ii] is None: joint_features['lbp'][ii] = lbp_histograms[ i[1] - box_corner[0], i[0] - box_corner[1]] else: joint_features['lbp'][ii] = np.vstack([ joint_features['lbp'][ii], deepcopy(lbp_histograms[i[1] - box_corner[0], i[0] - box_corner[1]]) ]) except: print "error" ''' ---- Per Joint Classification ---- ''' if not learn: try: # Geodesic clasification tmp = geodesic_features.reshape([-1, 6]) tmp = np.array([x / x[-1] for x in tmp]) tmp = np.nan_to_num(tmp) geo_clf_map = clf_geo.predict(tmp).reshape( im_pos.shape[:2]) * mask[box] geo_clf_labels = geo_clf_map[ pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] - box_corner[0], pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] - box_corner[1]] geo_accuracy += [ geo_clf_labels == [0, 1, 4, 7, 10, 13] ] print 'G', np.mean( geo_accuracy, 0), geo_clf_labels == [0, 1, 4, 7, 10, 13] cv2.imshow('Geo', geo_clf_map / float(geo_clf_map.max())) except: pass try: # Color histogram classification color_test = color_approx.transform( color_histograms.reshape([-1, 5])) color_clf_map = clf_color.predict(color_test).reshape( im_pos.shape[:2]) * mask[box] color_clf_labels = color_clf_map[ pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] - box_corner[0], pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] - box_corner[1]] color_accuracy += [ color_clf_labels == [0, 1, 4, 7, 10, 13] ] print 'C', np.mean( color_accuracy, 0), color_clf_labels == [0, 1, 4, 7, 10, 13] cv2.imshow('Col', color_clf_map / float(color_clf_map.max())) except: pass try: # lbp histogram classification lbp_test = color_approx.transform( lbp_histograms.reshape([-1, 10])) lbp_clf_map = clf_lbp.predict(lbp_test).reshape( im_pos.shape[:2]) * mask[box] lbp_clf_labels = lbp_clf_map[ pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] - box_corner[0], pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] - box_corner[1]] lbp_accuracy += [ lbp_clf_labels == [0, 1, 4, 7, 10, 13] ] print 'L', np.mean( lbp_accuracy, 0), lbp_clf_labels == [0, 1, 4, 7, 10, 13] cv2.imshow('LBP', lbp_clf_map / float(lbp_clf_map.max())) except: pass pose_uv = pose_truth_uv pose = pose_truth # ''' ---- Accuracy ---- ''' if 1 and not learn: # pose_truth = cam.users[0] error = pose_truth - pose # print "Error", error error_l2 = np.sqrt(np.sum(error**2, 1)) # error_l2 = np.sqrt(np.sum(error[:,:2]**2, 1)) joint_accuracy_all += [error_l2] accuracy = np.sum(error_l2 < 150) / 14. accuracy_all += [accuracy] print "Current", accuracy # print "Running avg:", np.mean(accuracy_all) # print "Joint avg (per-joint):", np.mean(joint_accuracy_all, -1) # print "Joint avg (overall):", np.mean(joint_accuracy_all) ''' --- Visualization --- ''' cam.colorIm = display_skeletons(cam.colorIm, pose_truth_uv, skel_type='Kinect', color=(0, 255, 0)) cam.colorIm = display_skeletons(cam.colorIm, pose_uv, skel_type='Kinect') cam.visualize() # print "Extrema:", geo_clf_map[geodesic_markers[:,0], geodesic_markers[:,1]] # print "Skin:", geo_clf_map[hand_markers[:,0], hand_markers[:,1]] # print "Skin val:", hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]] # hand_data += [[x[0] for x in hand_markers], # [x[1] for x in hand_markers], # list(hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]])] # ------------------------------------------------------------ # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8)) # video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count += frame_rate except: pass print "-- Results for subject {:d} action {:d}".format( subjects[0], actions[0]) print "Running avg:", np.mean(accuracy_all) print "Joint avg (overall):", np.mean(joint_accuracy_all) # print 'Done' if learn: pose_database.save() print 'Pose database saved' embed() return ''' --- Format Geodesic features ---''' geodesics_train = [] geodesics_labels = [] for i in xrange(len(joint_features['geodesic'])): # joint_features['geodesic'][i] = np.array([np.sort(x) for x in joint_features['geodesic'][i] if x[0] != 0]) joint_features['geodesic'][i] = np.array( [x / x.max() for x in joint_features['geodesic'][i] if x[0] != 0]) ii = i if i not in [0, 1, 4, 7, 10, 13]: ii = 1 else: geodesics_labels += [ i * np.ones(len(joint_features['geodesic'][i])) ] geodesics_train = np.vstack( [joint_features['geodesic'][x] for x in [0, 1, 4, 7, 10, 13]]) # geodesics_train = np.vstack(joint_features['geodesic']) geodesics_labels = np.hstack(geodesics_labels) figure(1) title('Distances of each joint to first 6 geodesic extrema') for i in range(14): subplot(4, 4, i + 1) ylabel('Distance') xlabel('Sample') plot(joint_features['geodesic'][i]) axis([0, 400, 0, 1600]) # Learn geodesic classifier clf_geo = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf_geo.fit(geodesics_train, geodesics_labels) print clf_geo.score(geodesics_train, geodesics_labels) geodesic_features = np.sort(geodesic_features) sgd_map = clf_geo.predict(geodesic_features.reshape([-1, 6])).reshape( im_pos.shape[:2]) pickle.dump(clf_geo, open('geodesic_svm_sorted_scaled_5class.pkl', 'w'), pickle.HIGHEST_PROTOCOL) # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl')) ''' --- Color Histogram features ---''' color_train = [] color_labels = [] for i in xrange(len(joint_features['color_histograms'])): ii = i if i not in [0, 1, 4, 7, 10, 13]: ii = 1 else: color_labels += [ i * np.ones(len(joint_features['color_histograms'][i])) ] # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_train = np.vstack(joint_features['color_histograms']) color_train = np.vstack( [joint_features['color_histograms'][x] for x in [0, 1, 4, 7, 10, 13]]) color_labels = np.hstack(color_labels) color_approx = AdditiveChi2Sampler() color_approx_train = color_approx.fit_transform(color_train) clf = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf.fit(color_approx_train, color_labels) print clf.score(color_approx_train, color_labels) color_test = color_approx.transform(color_histograms.reshape([-1, 5])) sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2]) * mask[box] figure(1) title('Color Histograms per Joint') for i in range(14): subplot(4, 4, i + 1) ylabel('Count') xlabel('Sample') plot(joint_features['color_histograms'][i]) axis([0, 10, 0, 30]) for i in range(5): subplot(1, 5, i + 1) imshow(color_histograms[:, :, i]) pickle.dump([clf, color_approx], open('color_histogram_approx_svm_5class.pkl', 'w'), pickle.HIGHEST_PROTOCOL) # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl')) ''' --- LBP Histogram features ---''' color_train = [] color_labels = [] for i in xrange(len(joint_features['lbp'])): ii = i if i not in [0, 1, 4, 7, 10, 13]: ii = 1 else: color_labels += [i * np.ones(len(joint_features['lbp'][i]))] # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_train = np.vstack(joint_features['color_histograms']) color_train = np.vstack( [joint_features['lbp'][x] for x in [0, 1, 4, 7, 10, 13]]) color_labels = np.hstack(color_labels) color_approx = AdditiveChi2Sampler() color_approx_train = color_approx.fit_transform(color_train) clf = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf.fit(color_approx_train, color_labels) print clf.score(color_approx_train, color_labels) color_test = color_approx.transform(lbp_histograms.reshape([-1, 10])) sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2]) * mask[box] figure(1) title('LBP Histograms per Joint') for i in range(14): subplot(4, 4, i + 1) ylabel('Count') xlabel('Sample') plot(joint_features['lbp'][i]) axis([0, 10, 0, 30]) for i in range(5): subplot(1, 5, i + 1) imshow(color_histograms[:, :, i]) pickle.dump([clf, color_approx], open('lbp_histogram_approx_svm_5class.pkl', 'w'), pickle.HIGHEST_PROTOCOL)
def main_infer(rf_name=None): ''' ''' if rf_name is None: import os files = os.listdir('Saved_Params/') rf_name = 'Saved_Params/' + files[-1] print "Classifier file:", rf_name # Load classifier data data = pickle.load(open(rf_name)) rf = data['rf'] offsets_1, offsets_2 = data['offsets'] ''' Read data from each video/sequence ''' depthIms = [] skels_world = [] if 1: # VP = KinectPlayer(base_dir='/Users/colin/Data/Office_25Feb2013/', device=2, get_depth=True, get_color=False, get_skeleton=True, get_mask=False) VP = KinectPlayer(base_dir='/Users/colin/Data/Room_close_26Feb13/', device=1, get_depth=True, get_color=False, get_skeleton=True, get_mask=False) _, _ = VP.get_n_skeletons(50) depthIms, skels_world = VP.get_n_skeletons(100) # depthIms = np.array(depthIms)[:,:,::-1] else: # name = 'a01_s01_e02_' name = 'a01_s10_e02_' # name = 'a02_s06_e02_' # name = 'a05_s02_e02_' depth_file = name + "depth.bin" color_file = name + "rgb.avi" skeleton_file = name + "skeleton.txt" try: depthIms, maskIms = read_MSR_depth_ims(depth_file) depthIms *= maskIms depthIms /= 10 # colorIms = read_MSR_color_ims(color_file) skels_world, skels_im = read_MSR_skeletons(skeleton_file) skels_world[:, 2] /= 10 except: print "Error reading data" ''' Process data ''' all_pred_ims = [] for i in xrange(1, len(depthIms), 1): # try: if 1: print i ''' Get frame data ''' im_depth = depthIms[i] # im_depth[160:, :] = 0 skel_pos = skel2depth(skels_world[i], rez=[240, 320]) # ''' --- NOTE THIS 10 PX OFFSET IN THE MSR DATASET !!! --- ''' # skel_pos[:,0] -= 10 skel_pos_pred, im_predict = infer_pose(im_depth, rf, offsets_1, offsets_2) # Overlay skeletons if 1: # colorIm = colorIms[i] # im_predict = colorIm cv2.imshow("forest_prediction", im_predict / float(im_predict.max())) im_predict = np.repeat(im_depth[:, :, None].astype(np.float), 3, -1) # embed() im_predict[im_depth > 0] -= im_depth[im_depth > 0].min() im_predict /= float(im_predict.max() / 255.) im_predict = im_predict.astype(np.uint8) # im_predict = np.repeat(im_predict[:,:,None], 3, -1) # im_predict /= float(im_predict.max())*255 # im_predict = im_predict.astype(np.uint8) im_predict = display_skeletons(im_predict, skel_pos, (255, 0, 0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos_pred, (0, 255, 0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos, (255, 0, 0), SKEL_DISPLAY_MODE) im_predict = display_skeletons(im_predict, skel_pos_pred, (0, 255, 0), SKEL_DISPLAY_MODE) # embed() # max_ = (im_predict * (im_predict < 255)).max() all_pred_ims += [im_predict] ''' Visualize ''' if 1: cv2.putText(im_predict, "Blue=Truth", (10, 210), cv2.FONT_HERSHEY_DUPLEX, .5, (int(im_predict.max() / 2), 0, 0)) cv2.putText(im_predict, "Green=Predict", (10, 230), cv2.FONT_HERSHEY_DUPLEX, .5, (0, int(im_predict.max() / 2), 0)) cv2.imshow("prediction", im_predict) ret = cv2.waitKey(10) if ret > 0: break time.sleep(.5) # except: # print "Frame failed:", i # break embed()
def main(visualize=False, learn=False, actions=None, subjects=None, n_frames=220): # learn = True # learn = False if actions is []: actions = [2] if subjects is []: subjects = [2] # actions = [1] # actions = [1, 2, 3, 4, 5] # subjects = [1] if 1: MHAD = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) else: MHAD = False cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif') bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape([240,320]).clip(0, 4500) height, width = cam.depthIm.shape skel_previous = None # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl')) # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl')) # clf_lbp,lbp_approx = pickle.load(open('lbp_histogram_approx_svm_5class.pkl')) face_detector = FaceDetector() hand_detector = HandDetector(cam.depthIm.shape) curve_detector = CurveDetector(cam.depthIm.shape) # Video writer # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240)) # Save Background model # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I') # im.save("/Users/Colin/Desktop/k2.png") # Setup pose database append = True append = False pose_database = PoseDatabase("PoseDatabase.pkl", learn=learn, search_joints=[0,4,7,10,13], append=append) # Per-joint classification head_features = [] hand_features = [] feet_features = [] joint_features = {'geodesic':[None]*14, 'color_histograms':[None]*14, 'lbp':[None]*14} # Evaluation accuracy_all = [] joint_accuracy_all = [] geo_accuracy = [] color_accuracy = [] lbp_accuracy = [] frame_count = 0 frame_rate = 2 if not MHAD: cam.next(350) frame_prev = 0 try: # if 1: while cam.next(frame_rate):# and frame_count < n_frames: if frame_count - frame_prev > 100: print "" print "Frame #{0:d}".format(frame_count) frame_prev = frame_count if not MHAD: if len(cam.users) == 0: continue else: # cam.users = [np.array(cam.users[0]['jointPositions'].values())] if np.any(cam.users[0][0] == -1): continue cam.users[0][:,1] *= -1 cam.users_uv_msr = [cam.camera_model.world2im(cam.users[0], [240,320])] # Apply mask to image if MHAD: mask = cam.get_person(2) > 0 else: mask = cam.get_person() > 0 if np.all(mask==False): continue im_depth = cam.depthIm cam.depthIm[cam.depthIm>3000] = 0 im_color = cam.colorIm*mask[:,:,None] cam.colorIm *= mask[:,:,None] pose_truth = cam.users[0] pose_truth_uv = cam.users_uv_msr[0] # Get bounding box around person box = nd.find_objects(mask)[0] d = 20 # Widen box box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) box_corner = [box[0].start,box[1].start] ''' ---------- ----------------------------------- --------''' ''' ----------- Feature Detector centric approach ---------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Detectors ---- ''' # Face detection face_detector.run(im_color[box]) # Skin detection hand_markers = hand_detector.run(im_color[box], n_peaks=3) # curve detection # curve_markers = curve_detector.run((im_depth*mask)[box], n_peaks=3) # Calculate LBPs ##Max P=31 for LBPs becuase of datatype # x = local_occupancy_pattern(cam.depthIm[box]*mask[box], [5,5,5],[3,3,3]) # lop_texture = local_binary_pattern_depth(cam.depthIm[box]*mask[box], 10, 20, px_diff_thresh=100)*mask[box] # lop_markers = []#peak_local_max(lop_texture, min_distance=20, num_peaks=5, exclude_border=False) # lbp_texture = local_binary_pattern(cam.depthIm[box]*mask[box], 6, 20)*mask[box] # Calculate Geodesic Extrema im_pos = cam.camera_model.im2PosIm(cam.depthIm*mask)[box] * mask[box][:,:,None] geodesic_markers = geodesic_extrema_MPI(im_pos, iterations=5, visualize=False) # geodesic_markers, geo_map = geodesic_extrema_MPI(im_pos, iterations=5, visualize=True) geodesic_markers_pos = im_pos[geodesic_markers[:,0], geodesic_markers[:,1]] markers = list(geodesic_markers) + list(hand_markers) #+ list(lop_markers) + curve_markers markers = np.array([list(x) for x in markers]) if 1: ''' ---- Database lookup ---- ''' pts_mean = im_pos[(im_pos!=0)[:,:,2]].mean(0) if learn: # Normalize pose pose_uv = cam.users_uv[0] if np.any(pose_uv==0): print "skip" frame_count += frame_rate continue # print pose_truth[2], pts_mean pose_database.update(pose_truth - pts_mean) else: # Concatenate markers markers = list(geodesic_markers) + hand_markers # markers = list(geodesic_markers) + list(lop_markers) + curve_markers + hand_markers markers = np.array([list(x) for x in markers]) # Normalize pose pts = im_pos[markers[:,0], markers[:,1]] pts = np.array([x for x in pts if x[0] != 0]) pts -= pts_mean # Get closest pose pose = pose_database.query(pts, knn=1) # pose = pose_database.weighted_query(pts, knn=1) # pose = pose_database.reverse_query(pts[:,[1,0,2]]) # im_pos -= pts_mean # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000) # pose = np.dot(R.T, pose.T).T - t # pose = np.dot(R, pose.T).T + t pose += pts_mean pose_uv = cam.camera_model.world2im(pose, cam.depthIm.shape) # Constrain if 0: try: ''' This does worse because the joint may fall to a different part of the body (e.g. hand to torso) which throws the error upward ''' surface_map = nd.distance_transform_edt(im_pos[:,:,2]==0, return_distances=False, return_indices=True) pose_uv[:,:2] = surface_map[:, pose_uv[:,0]-box_corner[0], pose_uv[:,1]-box_corner[1]].T + [box_corner[0], box_corner[1]] pose = cam.camera_model.im2world(pose_uv) # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) # skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) # skel_current = collision_constraints(skel_current, constraint_links) # embed() # pose_uv_box = pose_uv - [box_corner[0], box_corner[1], 0] # pose_uv_box = pose_uv_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # joint_size = np.array([75]*14) # pose_n, pose_uv_n = ray_cast_constraints(pose, pose_uv_box, im_pos, surface_map, joint_size) # print 'Pose',pose,pose_n # pose = pose_n # pose_uv = pose_uv_n + [box_corner[0], box_corner[1], 0] except: print 'error constraining' # skel_previous = np.array(pose, copy=True) display_markers(cam.colorIm, hand_markers[:2], box, color=(0,250,0)) if len(hand_markers) > 2: display_markers(cam.colorIm, [hand_markers[2]], box, color=(0,200,0)) display_markers(cam.colorIm, geodesic_markers, box, color=(200,0,0)) # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100)) # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200)) if 0: ''' ---------- ----------------------------------- --------''' ''' ---------- Feature Descriptor centric approach --------''' ''' ---------- ----------------------------------- --------''' ''' ---- Calculate Descriptors ---- ''' hand_markers = np.array(hand_markers) # Geodesics geodesic_features = relative_marker_positions(im_pos, geodesic_markers_pos[:,[1,0,2]]) geodesic_features = np.sort(geodesic_features) # Color Histogram skin = skimage.exposure.rescale_intensity(hand_detector.im_skin, out_range=[0,255]).astype(np.uint8) color_histograms = local_histograms(skin, n_bins=5, max_bound=255, patch_size=11)*mask[box][:,:,None] # LBP Histogram lbp_texture = local_binary_pattern(cam.depthIm[box]*mask[box], 6, 5)*mask[box] lbp_histograms = local_histograms(lbp_texture.astype(np.uint8), n_bins=10, max_bound=2**6, patch_size=11)*mask[box][:,:,None] # for i in range(10): # subplot(2,5,i+1) # imshow(lbp_histograms[:,:,i]) ''' ---- Per Joint Learning ---- ''' if learn: for ii,i in enumerate(pose_truth_uv): if i[0] != 0: try: if joint_features['geodesic'][ii] is None: joint_features['geodesic'][ii] = geodesic_features[i[1]-box_corner[0], i[0]-box_corner[1]] else: joint_features['geodesic'][ii] = np.vstack([joint_features['geodesic'][ii], (geodesic_features[i[1]-box_corner[0], i[0]-box_corner[1]])]) if joint_features['color_histograms'][ii] is None: joint_features['color_histograms'][ii] = color_histograms[i[1]-box_corner[0], i[0]-box_corner[1]] else: joint_features['color_histograms'][ii] = np.vstack([joint_features['color_histograms'][ii], deepcopy(color_histograms[i[1]-box_corner[0], i[0]-box_corner[1]])]) if joint_features['lbp'][ii] is None: joint_features['lbp'][ii] = lbp_histograms[i[1]-box_corner[0], i[0]-box_corner[1]] else: joint_features['lbp'][ii] = np.vstack([joint_features['lbp'][ii], deepcopy(lbp_histograms[i[1]-box_corner[0], i[0]-box_corner[1]])]) except: print "error" ''' ---- Per Joint Classification ---- ''' if not learn: try: # Geodesic clasification tmp = geodesic_features.reshape([-1, 6]) tmp = np.array([x/x[-1] for x in tmp]) tmp = np.nan_to_num(tmp) geo_clf_map = clf_geo.predict(tmp).reshape(im_pos.shape[:2])*mask[box] geo_clf_labels = geo_clf_map[pose_truth_uv[[0,1,4,7,10,13],1]-box_corner[0], pose_truth_uv[[0,1,4,7,10,13],0]-box_corner[1]] geo_accuracy += [geo_clf_labels == [0,1,4,7,10,13]] print 'G',np.mean(geo_accuracy,0), geo_clf_labels==[0,1,4,7,10,13] cv2.imshow('Geo', geo_clf_map/float(geo_clf_map.max())) except: pass try: # Color histogram classification color_test = color_approx.transform(color_histograms.reshape([-1, 5])) color_clf_map = clf_color.predict(color_test).reshape(im_pos.shape[:2])*mask[box] color_clf_labels = color_clf_map[pose_truth_uv[[0,1,4,7,10,13],1]-box_corner[0], pose_truth_uv[[0,1,4,7,10,13],0]-box_corner[1]] color_accuracy += [color_clf_labels == [0,1,4,7,10,13]] print 'C',np.mean(color_accuracy,0), color_clf_labels==[0,1,4,7,10,13] cv2.imshow('Col', color_clf_map/float(color_clf_map.max())) except: pass try: # lbp histogram classification lbp_test = color_approx.transform(lbp_histograms.reshape([-1, 10])) lbp_clf_map = clf_lbp.predict(lbp_test).reshape(im_pos.shape[:2])*mask[box] lbp_clf_labels = lbp_clf_map[pose_truth_uv[[0,1,4,7,10,13],1]-box_corner[0], pose_truth_uv[[0,1,4,7,10,13],0]-box_corner[1]] lbp_accuracy += [lbp_clf_labels == [0,1,4,7,10,13]] print 'L',np.mean(lbp_accuracy,0), lbp_clf_labels==[0,1,4,7,10,13] cv2.imshow('LBP', lbp_clf_map/float(lbp_clf_map.max())) except: pass pose_uv = pose_truth_uv pose = pose_truth # ''' ---- Accuracy ---- ''' if 1 and not learn: # pose_truth = cam.users[0] error = pose_truth - pose # print "Error", error error_l2 = np.sqrt(np.sum(error**2, 1)) # error_l2 = np.sqrt(np.sum(error[:,:2]**2, 1)) joint_accuracy_all += [error_l2] accuracy = np.sum(error_l2 < 150) / 14. accuracy_all += [accuracy] print "Current", accuracy # print "Running avg:", np.mean(accuracy_all) # print "Joint avg (per-joint):", np.mean(joint_accuracy_all, -1) # print "Joint avg (overall):", np.mean(joint_accuracy_all) ''' --- Visualization --- ''' cam.colorIm = display_skeletons(cam.colorIm, pose_truth_uv, skel_type='Kinect', color=(0,255,0)) cam.colorIm = display_skeletons(cam.colorIm, pose_uv, skel_type='Kinect') cam.visualize() # print "Extrema:", geo_clf_map[geodesic_markers[:,0], geodesic_markers[:,1]] # print "Skin:", geo_clf_map[hand_markers[:,0], hand_markers[:,1]] # print "Skin val:", hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]] # hand_data += [[x[0] for x in hand_markers], # [x[1] for x in hand_markers], # list(hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]])] # ------------------------------------------------------------ # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8)) # video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count += frame_rate except: pass print "-- Results for subject {:d} action {:d}".format(subjects[0],actions[0]) print "Running avg:", np.mean(accuracy_all) print "Joint avg (overall):", np.mean(joint_accuracy_all) # print 'Done' if learn: pose_database.save() print 'Pose database saved' embed() return ''' --- Format Geodesic features ---''' geodesics_train = [] geodesics_labels = [] for i in xrange(len(joint_features['geodesic'])): # joint_features['geodesic'][i] = np.array([np.sort(x) for x in joint_features['geodesic'][i] if x[0] != 0]) joint_features['geodesic'][i] = np.array([x/x.max() for x in joint_features['geodesic'][i] if x[0] != 0]) ii = i if i not in [0,1,4,7,10,13]: ii=1 else: geodesics_labels += [i*np.ones(len(joint_features['geodesic'][i]))] geodesics_train = np.vstack([joint_features['geodesic'][x] for x in [0,1,4,7,10,13]]) # geodesics_train = np.vstack(joint_features['geodesic']) geodesics_labels = np.hstack(geodesics_labels) figure(1) title('Distances of each joint to first 6 geodesic extrema') for i in range(14): subplot(4,4,i+1) ylabel('Distance') xlabel('Sample') plot(joint_features['geodesic'][i]) axis([0,400,0,1600]) # Learn geodesic classifier clf_geo = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf_geo.fit(geodesics_train, geodesics_labels) print clf_geo.score(geodesics_train, geodesics_labels) geodesic_features = np.sort(geodesic_features) sgd_map = clf_geo.predict(geodesic_features.reshape([-1, 6])).reshape(im_pos.shape[:2]) pickle.dump(clf_geo, open('geodesic_svm_sorted_scaled_5class.pkl','w'), pickle.HIGHEST_PROTOCOL) # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl')) ''' --- Color Histogram features ---''' color_train = [] color_labels = [] for i in xrange(len(joint_features['color_histograms'])): ii = i if i not in [0,1,4,7,10,13]: ii=1 else: color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_train = np.vstack(joint_features['color_histograms']) color_train = np.vstack([joint_features['color_histograms'][x] for x in [0,1,4,7,10,13]]) color_labels = np.hstack(color_labels) color_approx = AdditiveChi2Sampler() color_approx_train = color_approx.fit_transform(color_train) clf = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf.fit(color_approx_train, color_labels) print clf.score(color_approx_train, color_labels) color_test = color_approx.transform(color_histograms.reshape([-1, 5])) sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2])*mask[box] figure(1) title('Color Histograms per Joint') for i in range(14): subplot(4,4,i+1) ylabel('Count') xlabel('Sample') plot(joint_features['color_histograms'][i]) axis([0,10,0,30]) for i in range(5): subplot(1,5,i+1) imshow(color_histograms[:,:,i]) pickle.dump([clf,color_approx], open('color_histogram_approx_svm_5class.pkl','w'), pickle.HIGHEST_PROTOCOL) # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl')) ''' --- LBP Histogram features ---''' color_train = [] color_labels = [] for i in xrange(len(joint_features['lbp'])): ii = i if i not in [0,1,4,7,10,13]: ii=1 else: color_labels += [i*np.ones(len(joint_features['lbp'][i]))] # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))] # color_train = np.vstack(joint_features['color_histograms']) color_train = np.vstack([joint_features['lbp'][x] for x in [0,1,4,7,10,13]]) color_labels = np.hstack(color_labels) color_approx = AdditiveChi2Sampler() color_approx_train = color_approx.fit_transform(color_train) clf = SGDClassifier(n_iter=10000, alpha=.01, n_jobs=-1, class_weight='auto') clf.fit(color_approx_train, color_labels) print clf.score(color_approx_train, color_labels) color_test = color_approx.transform(lbp_histograms.reshape([-1, 10])) sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2])*mask[box] figure(1) title('LBP Histograms per Joint') for i in range(14): subplot(4,4,i+1) ylabel('Count') xlabel('Sample') plot(joint_features['lbp'][i]) axis([0,10,0,30]) for i in range(5): subplot(1,5,i+1) imshow(color_histograms[:,:,i]) pickle.dump([clf,color_approx], open('lbp_histogram_approx_svm_5class.pkl','w'), pickle.HIGHEST_PROTOCOL)
def main(visualize=False, learn=False, patch_size=32, n_frames=2500): if 1: get_color = True cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinects=[1], actions=[1], subjects=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) # cam.bgSubtraction.backgroundModel = sm.imread('/Users/colin/Data/CIRL_28Feb2013/depth/59/13/47/device_1/depth_59_13_47_4_13_35507.png').clip(0, 4500) # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/Office_Background_B.tif') # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif') # bg = Image.open('/Users/colin/Data/JHU_RGBD_Pose/Wall_Background_B.tif') # cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape([240,320]).clip(0, 4500) # embed() # cam = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) elif 0: get_color = False cam = EVALPlayer(base_dir='/Users/colin/Data/EVAL/', bg_subtraction=True, get_depth=True, get_skeleton=True, fill_images=False) elif 0: get_color = False cam = MSRPlayer(base_dir='/Users/colin/Data/MSR_DailyActivities/Data/', actions=[1], subjects=[1,2,3,4,5], bg_subtraction=True, get_depth=True, get_color=True, get_skeleton=True, fill_images=False) embed() height, width = cam.depthIm.shape skel_names = np.array(['head', 'neck', 'torso', 'l_shoulder', 'l_elbow', 'l_hand', \ 'r_shoulder', 'r_elbow', 'r_hand', 'l_hip', 'l_knee', 'l_foot',\ 'r_hip', 'r_knee', 'r_foot']) # skel_init, joint_size, constraint_links, features_joints,convert_to_kinect = get_11_joint_properties() skel_init, joint_size, constraint_links, features_joints,skel_parts, convert_to_kinect = get_13_joint_properties() # skel_init, joint_size, constraint_links, features_joints,skel_parts, convert_to_kinect = get_14_joint_properties() # skel_init, joint_size, constraint_links, features_joints,convert_to_kinect = get_15_joint_properties() constraint_values = [] for c in constraint_links: constraint_values += [np.linalg.norm(skel_init[c[0]]-skel_init[c[1]], 2)] constraint_values = np.array(constraint_values) skel_current = skel_init.copy() skel_previous = skel_current.copy() face_detector = FaceDetector() hand_template = sm.imread('/Users/colin/Desktop/fist.png')[:,:,2] hand_template = (255 - hand_template)/255. if height == 240: hand_template = cv2.resize(hand_template, (10,10)) else: hand_template = cv2.resize(hand_template, (20,20)) frame_count = 0 if get_color and height==240: cam.next(220) accuracy_measurements = {'overall':[], 'per_joint':[]} # Video writer # print '1' video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240)) # print '1' # embed() while cam.next(1) and frame_count < n_frames: print "" print "Frame #{0:d}".format(frame_count) # Get rid of bad skeletons if type(cam.users)==dict: cam_skels = [np.array(cam.users[s]['jointPositions'].values()) for s in cam.users] else: cam_skels = [np.array(s) for s in cam.users] cam_skels = [s for s in cam_skels if np.all(s[0] != -1)] # Check for skeletons # if len(cam_skels) == 0: # continue # Apply mask to image mask = cam.get_person() > 0 # if mask is False: if 1: if len(cam_skels) > 0: # cam.colorIm = display_skeletons(cam.colorIm[:,:,2], skel_msr_im, (255,), skel_type='Kinect')[:,:,None] cam.colorIm[:,:,1] = display_skeletons(cam.colorIm[:,:,2], skel2depth(cam_skels[0], cam.depthIm.shape), (255,), skel_type='Kinect') ## Max P=31 for LBPs becuase of datatype # tmp = local_binary_pattern(-cam.depthIm, 1, 10)#*(cam.foregroundMask>0) # embed() # tmp = local_occupancy_pattern(cam.depthIm, 31, 20, px_diff_thresh=100)*(cam.foregroundMask>0) # cv2.imshow("LBP", np.abs(tmp.astype(np.float))/float(tmp.max())) cam.colorIm = cam.colorIm[:,:,[0,2,1]] cam.visualize() continue # Anonomize # c_masked = cam.colorIm*mask[:,:,None] # d_masked = cam.depthIm*mask # c_masked_neg = cam.colorIm*(-mask[:,:,None]) im_depth = cam.depthIm if get_color: im_color = cam.colorIm im_color *= mask[:,:,None] im_color = np.ascontiguousarray(im_color) im_color = im_color[:,:,[2,1,0]] if len(cam_skels) > 0: skel_msr_xyz = cam_skels[0] skel_msr_im = skel2depth(cam_skels[0], cam.depthIm.shape) box = nd.find_objects(mask)[0] d = 20 box = (slice(np.maximum(box[0].start-d, 0), \ np.minimum(box[0].stop+d, height-1)), \ slice(np.maximum(box[1].start-d, 0), \ np.minimum(box[1].stop+d, width-1))) # Face and skin detection if get_color: face_detector.run(im_color[box]) im_skin = rgb2lab(cam.colorIm[box].astype(np.int16))[:,:,1] # im_skin = skimage.exposure.equalize_hist(im_skin) # im_skin = skimage.exposure.rescale_intensity(im_skin, out_range=[0,1]) im_skin *= im_skin > face_detector.min_threshold im_skin *= im_skin < face_detector.max_threshold # im_skin *= face_detector>.068 skin_match_c = nd.correlate(im_skin, hand_template) # Display Predictions - Color Based matching optima = peak_local_max(skin_match_c, min_distance=20, num_peaks=3, exclude_border=False) # Visualize if len(optima) > 0: optima_values = skin_match_c[optima[:,0], optima[:,1]] optima_thresh = np.max(optima_values) / 2 optima = optima.tolist() for i,o in enumerate(optima): if optima_values[i] < optima_thresh: optima.pop(i) break joint = np.array(o) + [box[0].start, box[1].start] circ = np.array(circle(joint[0],joint[1], 5)).T circ = circ.clip([0,0], [height-1, width-1]) cam.colorIm[circ[:,0], circ[:,1]] = (0,120 - 30*i,0)#(255*(i==0),255*(i==1),255*(i==2)) markers = optima # ---------------- Tracking Algorithm ---------------- # ---- Preprocessing ---- if get_color: im_pos = rgbIm2PosIm(cam.depthIm*mask)[box] * mask[box][:,:,None] else: im_pos = cam.posIm[box] cam.depthIm[cam.depthIm==4500] = 0 im_pos_mean = np.array([ im_pos[:,:,0][im_pos[:,:,2]!=0].mean(), im_pos[:,:,1][im_pos[:,:,2]!=0].mean(), im_pos[:,:,2][im_pos[:,:,2]!=0].mean() ], dtype=np.int16) # Zero-center if skel_current[0,2] == 0: skel_current += im_pos_mean skel_previous += im_pos_mean # Calculate Geodesic Extrema extrema = geodesic_extrema_MPI(im_pos, iterations=15, visualize=False) if len(extrema) > 0: for i,o in enumerate(extrema): joint = np.array(o) + [box[0].start, box[1].start] circ = np.array(circle(joint[0],joint[1], 5)).T circ = circ.clip([0,0], [height-1, width-1]) cam.colorIm[circ[:,0], circ[:,1]] = (0,0,200-10*i) # Calculate Z-surface surface_map = nd.distance_transform_edt(-mask[box], return_distances=False, return_indices=True) # Only sample some of the points if 1: mask_interval = 1 feature_radius = 10 else: mask_interval = 3 feature_radius = 2 # Modify the box wrt the sampling box = (slice(box[0].start, box[0].stop, mask_interval),slice(box[1].start, box[1].stop, mask_interval)) im_pos_full = im_pos.copy() im_pos = im_pos[::mask_interval,::mask_interval] box_height, box_width,_ = im_pos.shape skel_img_box = world2rgb(skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0] skel_img_box = skel_img_box.clip([0,0,0], [im_pos.shape[0]-1, im_pos.shape[1]-1, 9999]) feature_width = feature_radius*2+1 all_features = [face_detector.face_position, optima, extrema] total_feature_count = np.sum([len(f) for f in all_features]) # Loop through the rest of the constraints for _ in range(1): # ---- (Step 1A) Find feature coordespondences ---- color_feature_displacement = feature_joint_displacements(skel_current, im_pos, all_features[1], features_joints[1], distance_thresh=500) depth_feature_displacement = feature_joint_displacements(skel_current, im_pos, all_features[2], features_joints[2], distance_thresh=500) # Alternative method: use kdtree ## Calc euclidian distance between each pixel and all joints px_corr = np.zeros([im_pos.shape[0], im_pos.shape[1], len(skel_current)]) for i,s in enumerate(skel_current): px_corr[:,:,i] = np.sqrt(np.sum((im_pos - s)**2, -1))# / joint_size[i]**2 ## Handle occlusions by argmax'ing over set of skel parts # visible_configurations = list(it.product([0,1], repeat=5))[1:] visible_configurations = [ #[0,1,1,1,1], #[1,0,0,0,0], [1,1,1,1,1] ] px_visibility_label = np.zeros([im_pos.shape[0], im_pos.shape[1], len(visible_configurations)], dtype=np.uint8) visible_scores = np.ones(len(visible_configurations))*np.inf # Try each occlusion configuration set for i,v in enumerate(visible_configurations): visible_joints = list(it.chain.from_iterable(skel_parts[np.array(v)>0])) px_visibility_label[:,:,i] = np.argmin(px_corr[:,:,visible_joints], -1)#.reshape([im_pos.shape[0], im_pos.shape[1]]) visible_scores[i] = np.min(px_corr[:,:,visible_joints], -1).sum() # Choose best occlusion configuration occlusion_index = np.argmin(visible_scores) occlusion_configuration = visible_configurations[occlusion_index] occlusion_set = list(it.chain.from_iterable(skel_parts[np.array(visible_configurations[occlusion_index])>0])) # Choose label for pixels based on occlusion configuration px_label = px_visibility_label[:,:,occlusion_index]*mask[box] px_label_flat = px_visibility_label[:,:,occlusion_index][mask[box]].flatten() visible_joints = [1 if x in occlusion_set else 0 for x in range(len(skel_current))] # Project distance to joint's radius px_joint_displacement = skel_current[px_label_flat] - im_pos[mask[box]] px_joint_magnitude = np.sqrt(np.sum(px_joint_displacement**2,-1)) joint_mesh_pos = skel_current[px_label_flat] + px_joint_displacement*(joint_size[px_label_flat]/px_joint_magnitude)[:,None] px_joint_displacement = joint_mesh_pos - im_pos[mask[box]] # Calc the correspondance change in position for each joint correspondence_displacement = np.zeros([len(skel_current), 3]) ii = 0 for i,_ in enumerate(skel_current): if i in occlusion_set: labels = px_label_flat==i correspondence_displacement[i] = np.mean(px_joint_displacement[px_label_flat==ii], 0) # correspondence_displacement[ii] = np.sum(px_joint_displacement[px_label_flat==ii], 0) ii+=1 correspondence_displacement = np.nan_to_num(correspondence_displacement) # ---- (Step 2) Update pose state, x ---- lambda_p = .0 lambda_c = .3 lambda_cf = .3 lambda_df = .0 skel_prev_difference = (skel_current - skel_previous) # embed() skel_current = skel_previous \ + lambda_p * skel_prev_difference \ - lambda_c * correspondence_displacement\ - lambda_cf * color_feature_displacement\ - lambda_df * depth_feature_displacement # ---- (Step 3) Add constraints ---- # A: Link lengths / geometry skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5) skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5) # skel_current = collision_constraints(skel_current, constraint_links) skel_img_box = (world2rgb(skel_current, cam.depthIm.shape) - [box[0].start, box[1].start, 0])/mask_interval skel_img_box = skel_img_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999]) # B: Ray-cast constraints skel_current, skel_img_box = ray_cast_constraints(skel_current, skel_img_box, im_pos_full, surface_map, joint_size) # # Map back from mask to image skel_img = skel_img_box + [box[0].start, box[1].start, 0] # Update for next round skel_previous = skel_current.copy() # skel_previous = skel_init.copy() # ---------------------Accuracy -------------------------------------- # Compute accuracy wrt standard Kinect data # skel_im_error = skel_msr_im[:,[1,0]] - skel_img[[0,2,3,4,5,6,7,8,9,10,11,12,13,14],:2] skel_current_kinect = convert_to_kinect(skel_current) try: skel_msr_im_box = np.array([skel_msr_im[:,1]-box[0].start,skel_msr_im[:,0]-box[1].start]).T.clip([0,0],[box_height-1, box_width-1]) skel_xyz_error = im_pos[skel_msr_im_box[:,0],skel_msr_im_box[:,1]] - skel_current_kinect#skel_current[[0,2,3,4,5,6,7,8,9,10,11,12],:] skel_l2 = np.sqrt(np.sum(skel_xyz_error**2, 1)) # print skel_l2 skel_correct = np.nonzero(skel_l2 < 150)[0] accuracy_measurements['per_joint'] += [skel_l2] accuracy_measurements['overall'] += [len(skel_correct)/float(len(skel_current_kinect))*100] print "{0:0.2f}% joints correct".format(len(skel_correct)/float(len(skel_current_kinect))*100) print "Overall accuracy: ", np.mean(accuracy_measurements['overall']) except: pass print "Visible:", visible_joints # ----------------------Visualization------------------------------------- # l = line(skel_img_box[joint][0], skel_img_box[joint][1], features[feat][0], features[feat][1]) # skimage.draw.set_color(cam.colorIm[box], l, (255,255,255)) # Add circles to image cam.colorIm = np.ascontiguousarray(cam.colorIm) if 0:#get_color: cam.colorIm = display_skeletons(cam.colorIm, skel_img[:,[1,0,2]]*np.array(visible_joints)[:,None], (0,255,), skel_type='Other', skel_contraints=constraint_links) for i,s in enumerate(skel_img): # if i not in skel_correct: if i not in occlusion_set: c = circle(s[0], s[1], 5) cam.colorIm[c[0], c[1]] = (255,0,0) # cam.colorIm = display_skeletons(cam.colorIm, world2rgb(skel_init+im_pos_mean, [240,320])[:,[1,0]], skel_type='Other', skel_contraints=constraint_links) if 1: if len(face_detector.face_position) > 0: for (x, y) in face_detector.face_position: pt1 = (int(y)+box[1].start-15, int(x)+box[0].start-15) pt2 = (pt1[0]+int(15), pt1[1]+int(15)) cv2.rectangle(cam.colorIm, pt1, pt2, (255, 0, 0), 3, 8, 0) if len(cam_skels) > 0: # cam.colorIm = display_skeletons(cam.colorIm[:,:,2], skel_msr_im, (255,), skel_type='Kinect')[:,:,None] cam.colorIm[:,:,1] = display_skeletons(cam.colorIm[:,:,2], skel_msr_im, (255,), skel_type='Kinect') cam.visualize() cam.depthIm = local_binary_pattern(cam.depthIm*cam.foregroundMask, 50, 10) cv2.imshow("Depth", cam.depthIm/float(cam.depthIm.max())) # cam2.visualize() # embed() # 3D Visualization if 0: from mayavi import mlab # figure = mlab.figure(1, bgcolor=(0,0,0), fgcolor=(1,1,1)) figure = mlab.figure(1, bgcolor=(1,1,1)) figure.scene.disable_render = True mlab.clf() mlab.view(azimuth=-45, elevation=45, roll=0, figure=figure) mlab.points3d(-skel_current[:,1], -skel_current[:,0], skel_current[:,2], scale_factor=100., color=(.5,.5,.5)) for c in constraint_links: x = np.array([skel_current[c[0]][0], skel_current[c[1]][0]]) y = np.array([skel_current[c[0]][1], skel_current[c[1]][1]]) z = np.array([skel_current[c[0]][2], skel_current[c[1]][2]]) mlab.plot3d(-y,-x,z, tube_radius=25., color=(1,0,0)) figure.scene.disable_render = False # 3-panel view if 0: subplot(2,2,1) scatter(skel_current[:,1], skel_current[:,2]); for i,c in enumerate(constraint_links): plot([skel_current[c[0],1], skel_current[c[1],1]],[skel_current[c[0],2], skel_current[c[1],2]]) axis('equal') subplot(2,2,3) scatter(skel_current[:,1], -skel_current[:,0]); for i,c in enumerate(constraint_links): plot([skel_current[c[0],1], skel_current[c[1],1]],[-skel_current[c[0],0], -skel_current[c[1],0]]) axis('equal') subplot(2,2,4) scatter(skel_current[:,2], -skel_current[:,0]); for i,c in enumerate(constraint_links): plot([skel_current[c[0],2], skel_current[c[1],2]],[-skel_current[c[0],0], -skel_current[c[1],0]]) axis('equal') # show() # ------------------------------------------------------------ video_writer.write(cam.colorIm[:,:,[2,1,0]]) frame_count+=1 print 'Done'
def main(anonomization=False): # Setup kinect data player # cam = KinectPlayer(base_dir='./', bg_subtraction=False, get_depth=True, get_color=True, get_skeleton=False) # actions = [2, 5, 7] actions = [2] actions_labels = ['JumpingJacks', 'Waving', 'Clapping'] action = 'JumpingJacks' subjects = range(1, 10) cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/', kinect=1, actions=actions, subjects=subjects, reps=[1], get_depth=True, get_color=True, get_skeleton=True, fill_images=False) if anonomization: ''' bg_type can be: 'box'[param=max_depth] 'static'[param=background] 'mean' 'median' 'adaptive_mog' See BasePlayer for more details ''' cam.set_bg_model(bg_type='box', param=2600) # Test HOG pedestrian detector # cv2.HOGDescriptor_getDefaultPeopleDetector() # imshow(color[people]) body_data = [[]] framerate = 1 prev_n = len(cam.kinect_folder_names) action_idx = 0 # embed() while cam.next(framerate): if len(cam.kinect_folder_names) != prev_n: # action_idx += 1 body_data += [[]] prev_n = len(cam.kinect_folder_names) body_data[-1] += [cam.users_uv[0]] continue if 0: people_all = list( cv.HOGDetectMultiScale(cv.fromarray( cam.colorIm.mean(-1).astype(np.uint8)), cv.CreateMemStorage(0), hit_threshold=-1.5)) print len(people_all), " people detected" for i, people in enumerate(people_all): # embed() try: print people # tmp = cam.colorIm[people[0][0]:people[0][0]+people[1][0], people[0][1]:people[0][1]+people[1][1]] # tmp = cam.colorIm[people[0][1]:people[0][1]+people[1][1], people[0][0]:people[0][0]+people[1][0]] cam.colorIm[people[0][1]:people[0][1] + people[1][1], people[0][0]:people[0][0] + people[1][0]] += 50 # cv2.imshow("Body2 "+str(i), tmp) # subplot(4, len(people_all)/4, i + 1) # imshow(tmp) # tmp = cam.colorIm[people[0][0]:people[0][0]+people[0][1], people[1][0]:people[1][0]+people[1][1]] # cv2.imshow("Body1 "+str(i), tmp) # tmp = cam.colorIm[people[1][0]:people[1][0]+people[1][1], people[0][0]:people[0][0]+people[0][1]] # print tmp # cv2.imshow("Body "+str(i), tmp) except: pass # show() if anonomization and cam.mask is not None: mask = (sm.imresize(cam.mask, [480, 640]) > 0).copy() mask[40:170, 80:140] = True # person in background px = draw.circle(145, 285, 40) mask[px] = True # Version 1 - Blur + Circle color = cam.colorIm.copy() color[mask] = cv2.GaussianBlur(color, (29, 29), 50)[mask] subplot(1, 3, 1) imshow(color) # Version 2 - Noise on mask color = cam.colorIm.copy() noise_key = np.random.randint(0, 255, cam.colorIm.shape).astype(np.uint8) color[mask] = color[mask] + noise_key[mask] subplot(1, 3, 2) imshow(color) # Version 3 - All noise color = cam.colorIm.copy() color = color + noise_key subplot(1, 3, 3) imshow(color) show() # cam.colorIm = cam.colorIm[:,:,[2,1,0]] # cam.colorIm *= mask[:,:,None] # cam.visualize(color=True, depth=True, text=True, colorize=True, depth_bounds=[0,4000]) # cam.visualize() cam.colorIm = display_skeletons(cam.colorIm, cam.users_uv[0], skel_type='Kinect', color=(0, 255, 0)) cam.visualize() body_data[actions_labels[action_idx]] += [cam.users_uv[0]] # cam.visualize(color=True, depth=True, text=False, colorize=False, depth_bounds=None) print 'Done' # Pause at the end # embed() import scipy.io as si si.matlab.savemat(action + ".mat", {'data': body_data})
def main(visualize=False, learn=False): # Init both cameras # fill = True fill = False get_color = True cam = KinectPlayer(base_dir='./', device=1, bg_subtraction=True, get_depth=True, get_color=get_color, get_skeleton=True, fill_images=fill) cam2 = KinectPlayer(base_dir='./', device=2, bg_subtraction=True, get_depth=True, get_color=get_color, get_skeleton=True, fill_images=fill) # Transformation matrix from first to second camera data = pickle.load(open("Registration.dat", 'r')) transform_c1_to_c2 = data['transform'] # Get random forest parameters if learn: rf = RFPose(offset_max=100, feature_count=300) else: rf = RFPose() rf.load_forest() ii = 0 # cam.next(200) all_joint_ims_z = [] all_joint_ims_c = [] while cam.next() and ii < 200: # Update frames cam2.sync_cameras(cam) if ii % 10 != 0: ii += 1 continue # Transform skels from cam1 to cam2 cam_skels = [ np.array(cam.users[s]['jointPositions'].values()) for s in cam.users.keys() ] # Get rid of bad skels cam_skels = [s for s in cam_skels if np.all(s[0] != -1)] if len(cam_skels) == 0: continue ii += 1 # Save images if 1: joint_ims_z = [] joint_ims_c = [] dx = 10 skel_tmp = skel2depth(cam_skels[0], [240, 320]) for j_pos in skel_tmp: # embed() joint_ims_z += [ cam.depthIm[j_pos[0] - dx:j_pos[0] + dx, j_pos[1] - dx:j_pos[1] + dx] ] joint_ims_c += [ cam.colorIm[j_pos[0] - dx:j_pos[0] + dx, j_pos[1] - dx:j_pos[1] + dx] ] if len(joint_ims_z) > 0: all_joint_ims_z += [joint_ims_z] all_joint_ims_c += [joint_ims_c] if 0: cam2_skels = transform_skels(cam_skels, transform_c1_to_c2, 'image') try: depth = cam2.get_person() if learn: rf.add_frame(depth, cam2_skels[0]) else: rf.infer_pose(depth) if visualize: cam2.depthIm = display_skeletons(cam2.depthIm, cam2_skels[0], (5000, ), skel_type='Low') skel1 = kinect_to_msr_skel( skel2depth(cam_skels[0], [240, 320])) cam.depthIm = display_skeletons(cam.depthIm, skel1, (5000, ), skel_type='Low') cam.visualize() cam2.visualize() except: pass embed() if learn: print "Starting forest" rf.learn_forest() print 'Done'