def convert_app(det_seq, trk_seq, save_path): for fr_idx, (det_frm, trk_frm) in enumerate(zip(det_seq, trk_seq)): cam_calib = np.array(det_frm['intrinsics']['cali']) for det_idx, det_out in enumerate(det_frm['prediction']): depth_pd = det_out['box3d']['location'][2] for td in trk_frm: match = np.where( np.sum(bh.get_box2d_array([det_out]) == np.array( td['det_box']), axis=1) == 5)[0] if len(match): #print(match, depth_pd, td['depth'], det_out['box3d']['xc'], td['x'], det_out['box3d']['yc'], td['y']) depth_pd = td['depth'] det_out['id'] = td['id'] det_out['box3d']['xc'] = td['x'] det_out['box3d']['yc'] = td['y'] # In camera coordinates location = tu.imagetocamera( np.array([[det_out['box3d']['xc'], det_out['box3d']['yc']]]), np.array([depth_pd]), cam_calib) det_out['box3d']['location'] = location.tolist() print("Saving updated tracking results with {} frames at {}...".format( len(det_seq), save_path)) dump_output_json(save_path, det_seq)
def convert_app(det_placeholder, det_out): if len(det_out['rois_pd']): depth_pd = det_out['depth_pd'].copy() rot_y = tu.alpha2rot_y(det_out['alpha_pd'], det_out['center_pd'][:, 0] - cfg.GTA.W // 2, cfg.GTA.FOCAL_LENGTH) # In camera coordinates location = tu.imagetocamera(det_out['center_pd'], depth_pd, det_out['cam_calib']) for idx, prediction in enumerate(det_placeholder['prediction']): if len(det_out['rois_pd']): pred_box2d = bh.get_box2d_array([prediction]) pred_cen = bh.get_cen_array([prediction]) roi_match = np.sum(det_out['rois_pd'] == pred_box2d, axis=1) == 5 cen_match = np.sum(det_out['center_pd'] == pred_cen, axis=1) == 2 match = np.where(roi_match * cen_match)[0] if len(match) == 0: continue box = det_out['rois_pd'][match].reshape(5) cen = det_out['center_pd'][match].reshape(2) alpha = det_out['alpha_pd'][match].reshape(1) dim = det_out['dim_pd'][match].reshape(3) orient = rot_y[match].reshape(1) loc = location[match].reshape(3) prediction['box2d']['x1'] = int(box[0]) prediction['box2d']['y1'] = int(box[1]) prediction['box2d']['x2'] = int(box[2]) prediction['box2d']['y2'] = int(box[3]) prediction['box2d']['confidence'] = float(box[4]) # 3D box projected center prediction['box3d']['xc'] = int(cen[0]) prediction['box3d']['yc'] = int(cen[1]) prediction['box3d']['alpha'] = float(alpha) prediction['box3d']['dimension'] = dim.tolist() prediction['box3d']['location'] = loc.tolist() prediction['box3d']['orientation'] = orient.tolist() return det_placeholder
def get_3d_box_from_2d(box_2d_cen, depth, rot_y, dim, cam_calib, cam_pose): points_cam = tu.imagetocamera(box_2d_cen, depth, cam_calib) vol_box = tu.computeboxes([rot_y], dim, points_cam) return vol_box