def inference_video(video_path, detector_2d): """ Do image -> 2d points -> 3d points to video. :param detector_2d: used 2d joints detector. Can be {alpha_pose, hr_pose} :param video_path: relative to outputs :return: None """ args = parse_args() args.detector_2d = detector_2d dir_name = os.path.dirname(video_path) dir_name_split = dir_name[:dir_name.rfind('/')] new_dir_name = os.path.join(dir_name_split, 'outputvideo') basename = os.path.basename(video_path) video_name = basename[:basename.rfind('.')] args.viz_video = video_path #args.viz_output = f'{dir_name}/{args.detector_2d}_{video_name}.mp4' args.viz_output = f'{new_dir_name}/{args.detector_2d}_{video_name}.mp4' # args.viz_limit = 20 # args.input_npz = 'outputs/alpha_pose_dance/dance.npz' args.evaluate = 'pretrained_h36m_detectron_coco.bin' with Timer(video_path): main(args)
def sgdRMSPropNestorov(w0, x, y, f, grad, learning_rate=0.01, batch_size=100, max_epochs=1000, alpha=0.9, delta=1e-6, ro=0.9, eps=1e-6, shuffle=False, rng=None): tm = Timer() n = x.shape[0] n_batches = get_num_batches(n, batch_size) w = np.copy(w0) v = np.zeros(len(w0), dtype=w0.dtype) # velocity r = np.zeros(len(w0), dtype=w0.dtype) # gradient accumulation variable epoch_losses = np.zeros(max_epochs, dtype=float) epoch = 0 w_best = np.copy(w0) loss_best = np.inf if n <= batch_size: # no need to shuffle since all instances will be used up in one batch shuffle = False if shuffle: shuffled_idxs = np.arange(n) if rng is None: np.random.shuffle(shuffled_idxs) else: rng.shuffle(shuffled_idxs) else: shuffled_idxs = None prev_loss = np.inf while epoch < max_epochs: losses = np.zeros(n_batches, dtype=float) for i in range(n_batches): xi, yi = get_sgd_batch(x, y, i, batch_size, shuffled_idxs=shuffled_idxs) tw = w + alpha * v g = grad(tw, xi, yi) r[:] = ro * r + (1 - ro) * np.multiply(g, g) dw_scale = (learning_rate / (np.sqrt(delta + r))) v = alpha * v - np.multiply(dw_scale, g) w[:] = w + v losses[i] = f(w, xi, yi) loss = np.mean(losses) if np.isnan(loss): logger.debug("loss is nan") logger.debug("|w|=%f" % w.dot(w)) raise ArithmeticError("loss is nan in sgd") epoch_losses[epoch] = loss if loss < loss_best: # pocket algorithm np.copyto(w_best, w) loss_best = loss epoch += 1 if (loss < eps or np.abs(loss - prev_loss) < eps or avg_loss_check(epoch_losses, epoch, n=20, eps=eps)): break prev_loss = loss debug_log_sgd_losses("sgdRMSPropNestorov", epoch_losses, epoch, n=20, timer=tm) # logger.debug("epochs: %d" % epoch) # logger.debug("net losses:") # logger.debug("epoch losses:\n%s" % str(epoch_losses[0:epoch])) # logger.debug("best loss: %f" % loss_best) return w_best
def sgd(w0, x, y, f, grad, learning_rate=0.01, batch_size=100, max_epochs=1000, eps=1e-6, shuffle=False, rng=None): tm = Timer() n = x.shape[0] n_batches = get_num_batches(n, batch_size) w = np.copy(w0) epoch_losses = np.zeros(max_epochs, dtype=float) epoch = 0 w_best = np.copy(w0) loss_best = np.inf if n <= batch_size: shuffle = False # no need to shuffle since all instances will be used up in one batch if shuffle: shuffled_idxs = np.arange(n) if rng is None: np.random.shuffle(shuffled_idxs) else: rng.shuffle(shuffled_idxs) else: shuffled_idxs = None while epoch < max_epochs: losses = np.zeros(n_batches, dtype=float) for i in range(n_batches): xi, yi = get_sgd_batch(x, y, i, batch_size, shuffled_idxs=shuffled_idxs) if xi.shape[0] == 0: raise ValueError("Batch size of 0") g = grad(w, xi, yi) w -= learning_rate * g losses[i] = f(w, xi, yi) if False: g_norm = g.dot(g) if np.isnan(g_norm) or np.isinf(g_norm): logger.debug("|grad|=%f, i=%d/%d, epoch:%d" % (g.dot(g), i+1, n_batches, epoch)) logger.debug("|w0|=%f" % w0.dot(w0)) raise ArithmeticError("grad is nan/inf in sgd") loss = np.mean(losses) if np.isnan(loss): logger.debug("loss is nan") logger.debug("|w|=%f" % w.dot(w)) raise ArithmeticError("loss is nan in sgd") epoch_losses[epoch] = loss if loss < loss_best: # pocket algorithm np.copyto(w_best, w) loss_best = loss epoch += 1 if loss < eps: break debug_log_sgd_losses("sgd", epoch_losses, epoch, n=20, timer=tm) # logger.debug("epochs: %d" % epoch) # logger.debug("net losses:") # logger.debug("epoch losses:\n%s" % str(epoch_losses[0:epoch])) # logger.debug("best loss: %f" % loss_best) return w_best
def inference_video(video_path): """ Do image -> 2d points -> 3d points to video. :param video_path: relative to outputs """ args = parse_args() dir_name = os.path.dirname(video_path) basename = os.path.basename(video_path) video_name = basename[:basename.rfind('.')] args.viz_video = video_path args.viz_output = '{0}/o_{1}.mp4'.format(dir_name, video_name) args.basename = video_name args.evaluate = 'pretrained_h36m_detectron_coco.bin' with Timer(video_path): main(args)
def cal_pose_iou_dm_speed_up(all_cors, pose1, pose2, num, mag): # with Timer('Matrix calculation'): 0.0006s poses_iou = [] mag_matrix = [-mag, mag, -mag, mag] pose1_boxes = np.hstack((pose1, pose1)) pose2_boxes = np.hstack((pose2, pose2)) pose1_boxes[:, [2, 1]] = pose1_boxes[:, [1, 2]] pose2_boxes[:, [2, 1]] = pose2_boxes[:, [1, 2]] pose1_boxes += mag_matrix pose2_boxes += mag_matrix with Timer('find two pose box iou', show=False): for pose1_box, pose2_box in zip(pose1_boxes, pose2_boxes): poses_iou.append(find_two_pose_box_iou(pose1_box, pose2_box, all_cors)) return np.mean(heapq.nlargest(num, poses_iou))
def cal_one_matching(all_cors, all_pids_fff, all_pids_info, cost_matrix, mag, num, pid1, track_vid_next_fid, weights, weights_fff): box1_pos = all_pids_info[pid1]['box_pos'] box1_region_ids = find_region_cors_last(box1_pos, all_cors) box1_score = all_pids_info[pid1]['box_score'] box1_pose = all_pids_info[pid1]['box_pose_pos'] box1_fff = all_pids_fff[pid1] row = np.zeros(cost_matrix.shape[1]) # print(f"Inner for loop :{track_vid_next_fid['num_boxes']}", end=' ') # with Timer(f"Inner for loop: {track_vid_next_fid['num_boxes']}"): for pid2 in range(1, track_vid_next_fid['num_boxes'] + 1): box2_pos = track_vid_next_fid[pid2]['box_pos'] # with Timer('find_region_cors_next'): box2_region_ids = find_region_cors_next(box2_pos, all_cors) box2_score = track_vid_next_fid[pid2]['box_score'] box2_pose = track_vid_next_fid[pid2]['box_pose_pos'] # with Timer('Outer calculate'): inter = box1_region_ids & box2_region_ids union = box1_region_ids | box2_region_ids dm_iou = len(inter) / (len(union) + 0.00001) # with Timer('cal_bbox_iou'): box_iou = cal_bbox_iou(box1_pos, box2_pos) with Timer('cal_pose_iou_dm', show=False): pose_iou_dm = cal_pose_iou_dm_speed_up(all_cors, box1_pose, box2_pose, num, mag) # with Timer('cal_pose_iou'): pose_iou = cal_pose_iou(box1_pose, box2_pose, num, mag) # with Timer('cal_grade'): if box1_fff: grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights) else: grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights_fff) row[pid2 - 1] = grade return row
def sgdAdam(w0, x, y, f, grad, learning_rate=0.01, batch_size=100, max_epochs=1000, delta=1e-8, ro1=0.9, ro2=0.999, eps=1e-6, shuffle=False, rng=None): tm = Timer() n = x.shape[0] n_batches = get_num_batches(n, batch_size) w = np.copy(w0) s = np.zeros(len(w0), dtype=w0.dtype) # first moment variable s_hat = np.zeros(len(w0), dtype=w0.dtype) # first moment corrected for bias r = np.zeros(len(w0), dtype=w0.dtype) # second moment variable r_hat = np.zeros(len(w0), dtype=w0.dtype) # second moment corrected for bias t = 0 # time step epoch_losses = np.zeros(max_epochs, dtype=float) epoch = 0 w_best = np.copy(w0) loss_best = np.inf if n <= batch_size: # no need to shuffle since all instances will be used up in one batch shuffle = False if shuffle: shuffled_idxs = np.arange(n) if rng is None: np.random.shuffle(shuffled_idxs) else: rng.shuffle(shuffled_idxs) else: shuffled_idxs = None prev_loss = np.inf while epoch < max_epochs: losses = np.zeros(n_batches, dtype=float) for i in range(n_batches): xi, yi = get_sgd_batch(x, y, i, batch_size, shuffled_idxs=shuffled_idxs) g = grad(w, xi, yi) t += 1 s[:] = ro1 * s + (1 - ro1) * g r[:] = ro2 * r + (1 - ro2) * np.multiply(g, g) # correct bias in first moment s_hat[:] = (1./(1 - ro1 ** t)) * s # correct bias in second moment r_hat[:] = (1./(1 - ro2 ** t)) * r dw_scale = (learning_rate / (np.sqrt(delta + r_hat))) dw = np.multiply(dw_scale, s_hat) w[:] = w - dw losses[i] = f(w, xi, yi) loss = np.mean(losses) if np.isnan(loss): logger.debug("loss is nan") logger.debug("|w|=%f" % w.dot(w)) raise ArithmeticError("loss is nan in sgd") epoch_losses[epoch] = loss if loss < loss_best: # pocket algorithm np.copyto(w_best, w) loss_best = loss epoch += 1 if (loss < eps or np.abs(loss - prev_loss) < eps or avg_loss_check(epoch_losses, epoch, n=20, eps=eps)): break prev_loss = loss debug_log_sgd_losses("sgdAdam", epoch_losses, epoch, n=20, timer=tm) # logger.debug("epochs: %d" % epoch) # logger.debug("net losses:") # logger.debug("epoch losses:\n%s" % str(epoch_losses[0:epoch])) # logger.debug("best loss: %f" % loss_best) return w_best
def __init__(self): self.log = get_logger("Base") self.timer = Timer(self.log)
def main(args): """ See function track for the args information. """ link_len = args.link weights = [1, 2, 1, 2, 0, 0] weights_fff = [0, 1, 0, 1, 0, 0] drop = args.drop num = args.num mag = args.mag match_thres = args.match notrack_json = args.in_json tracked_json = args.out_json image_dir = args.imgdir vis_dir = args.visdir # if json format is differnt from "alphapose-forvis.json" (pytorch version) if "forvis" not in notrack_json: results_forvis = {} last_image_name = ' ' with open(notrack_json) as f: results = json.load(f) results = remove_irrelevant(results, 1) for i in range(len(results)): imgpath = results[i]['image_id'] if last_image_name != imgpath: results_forvis[imgpath] = [] results_forvis[imgpath].append({ 'keypoints': results[i]['keypoints'], 'scores': results[i]['score'] }) else: results_forvis[imgpath].append({ 'keypoints': results[i]['keypoints'], 'scores': results[i]['score'] }) last_image_name = imgpath notrack_json = os.path.join(os.path.dirname(notrack_json), "alphaposse-results-forvis.json") with open(notrack_json, 'w') as json_file: json_file.write(json.dumps(results_forvis)) notrack = {} track = {} num_persons = 0 # load json file without tracking information print("Start loading json file...\n") with open(notrack_json, 'r') as f: notrack = json.load(f) for img_name in tqdm(sorted(notrack.keys())): track[img_name] = {'num_boxes': len(notrack[img_name])} for bid in range(len(notrack[img_name])): track[img_name][bid + 1] = {} track[img_name][ bid + 1]['box_score'] = notrack[img_name][bid]['scores'] track[img_name][bid + 1]['box_pos'] = get_box( notrack[img_name][bid]['keypoints'], os.path.join(image_dir, img_name)) track[img_name][bid + 1]['box_pose_pos'] = np.array( notrack[img_name][bid]['keypoints']).reshape(-1, 3)[:, 0:2] track[img_name][bid + 1]['box_pose_score'] = np.array( notrack[img_name][bid]['keypoints']).reshape(-1, 3)[:, -1] np.save(f'{args.result_dir}/notrack-bl.npy', track) # track = np.load(f'{args.result_dir}/notrack-bl.npy', allow_pickle=True).item() # tracking process max_pid_id = 0 frame_list = sorted(list(track.keys())) print("Start pose tracking...\n") for idx, frame_name in enumerate(tqdm(frame_list[:-1])): frame_id = frame_name.split(".")[0] next_frame_name = frame_list[idx + 1] next_frame_id = next_frame_name.split(".")[0] # init tracking info of the first frame in one video if idx == 0: for pid in range(1, track[frame_name]['num_boxes'] + 1): track[frame_name][pid]['new_pid'] = pid track[frame_name][pid]['match_score'] = 0 max_pid_id = max(max_pid_id, track[frame_name]['num_boxes']) cor_file = os.path.join( image_dir, "".join([frame_id, '_', next_frame_id, '_orb.txt'])) # regenerate the missed pair-matching txt if not os.path.exists(cor_file) or os.stat(cor_file).st_size < 200: img1_path = os.path.join(image_dir, frame_name) img2_path = os.path.join(image_dir, next_frame_name) orb_matching(img1_path, img2_path, image_dir, frame_id, next_frame_id) all_cors = np.loadtxt(cor_file) # if there is no people in this frame, then copy the info from former frame if track[next_frame_name]['num_boxes'] == 0: track[next_frame_name] = copy.deepcopy(track[frame_name]) continue cur_all_pids, cur_all_pids_fff = stack_all_pids( track, frame_list[:-1], idx, max_pid_id, link_len) with Timer('best_matching_hungarian'): match_indexes, match_scores = best_matching_hungarian( all_cors, cur_all_pids, cur_all_pids_fff, track[next_frame_name], weights, weights_fff, num, mag) for pid1, pid2 in match_indexes: if match_scores[pid1][pid2] > match_thres: track[next_frame_name][ pid2 + 1]['new_pid'] = cur_all_pids[pid1]['new_pid'] max_pid_id = max(max_pid_id, track[next_frame_name][pid2 + 1]['new_pid']) track[next_frame_name][ pid2 + 1]['match_score'] = match_scores[pid1][pid2] # add the untracked new person for next_pid in range(1, track[next_frame_name]['num_boxes'] + 1): if 'new_pid' not in track[next_frame_name][next_pid]: max_pid_id += 1 track[next_frame_name][next_pid]['new_pid'] = max_pid_id track[next_frame_name][next_pid]['match_score'] = 0 np.save(f'{args.result_dir}/track-bl.npy', track) # track = np.load(f'{args.result_dir}/track-bl.npy').item() # calculate number of people num_persons = 0 for fid, frame_name in enumerate(frame_list): for pid in range(1, track[frame_name]['num_boxes'] + 1): num_persons = max(num_persons, track[frame_name][pid]['new_pid']) print("This video contains %d people." % (num_persons)) # export tracking result into notrack json files print("Export tracking results to json...\n") for fid, frame_name in enumerate(tqdm(frame_list)): for pid in range(track[frame_name]['num_boxes']): notrack[frame_name][pid]['idx'] = track[frame_name][pid + 1]['new_pid'] with open(tracked_json, 'w') as json_file: json_file.write(json.dumps(notrack)) if len(args.visdir) > 0: cmap = plt.cm.get_cmap("hsv", num_persons) display_pose(image_dir, vis_dir, notrack, cmap)
# super parameters # 1. look-ahead LINK_LEN frames to find tracked human bbox # 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score # 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching) # 4. drop low-score(<DROP) keypoints # 5. pick high-score(top NUM) keypoints when computing pose_IOU # 6. box width/height around keypoint for computing pose IoU # 7. match threshold in Hungarian Matching # User specific parameters video_name = os.path.basename(video_name) video_filename = video_name[:video_name.rfind('.')] args.imgdir = f'outputs/alpha_pose_{video_filename}/split_image' args.result_dir = f'outputs/alpha_pose_{video_filename}' args.in_json = f'{args.result_dir}/alphapose-results.json' args.out_json = f'{args.result_dir}/poseflow-results.json' args.visdir = f'{args.result_dir}/poseflow-vis' main(args) if __name__ == '__main__': os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "3" os.chdir('../..') with Timer('Track'): track('kobe.mp4')