def display_data(file): gen = tf.python_io.tf_record_iterator(file) for data_i, string_record in enumerate(gen): result = tf.train.Example.FromString(string_record) features = result.features.feature # maps are np.uint8 arrays. each has a different size. # wall map: 0 for free space, 255 for walls map_wall = decode_image(features['map_wall'].bytes_list.value[0]) # door map: 0 for free space, 255 for doors map_door = decode_image(features['map_door'].bytes_list.value[0]) # roomtype map: binary encoding of 8 possible room categories # one state may belong to multiple room categories map_roomtype = decode_image( features['map_roomtype'].bytes_list.value[0]) # roomid map: pixels correspond to unique room ids. # for overlapping rooms the higher ids overwrite lower ids map_roomid = decode_image(features['map_roomid'].bytes_list.value[0]) # true states # (x, y, theta). x,y: pixel coordinates; theta: radians # coordinates index the map as a numpy array: map[x, y] true_states = features['states'].bytes_list.value[0] true_states = np.frombuffer(true_states, np.float32).reshape((-1, 3)) # odometry # each entry is true_states[i+1]-true_states[i]. # last row is always [0,0,0] odometry = features['odometry'].bytes_list.value[0] odometry = np.frombuffer(odometry, np.float32).reshape((-1, 3)) # observations are enceded as a list of png images rgb = raw_images_to_array(list(features['rgb'].bytes_list.value)) depth = raw_images_to_array(list(features['depth'].bytes_list.value)) print("True states (first three)") print(true_states[:3]) print("Odometry (first three)") print(odometry[:3]) print("Plot map and first observation") # note: when printed as an image, map should be transposed plt.figure() plt.imshow(map_wall.transpose()) plt.figure() plt.imshow(rgb[0]) plt.show() if input("proceed?") != 'y': break
def topdown_unite_predict(detector, topdown_keypoint_detector, image_list, keypoint_batch_size=1): det_timer = detector.get_timer() for i, img_file in enumerate(image_list): # Decode image in advance in det + pose prediction det_timer.preprocess_time_s.start() image, _ = decode_image(img_file, {}) det_timer.preprocess_time_s.end() if FLAGS.run_benchmark: results = detector.predict([image], FLAGS.det_threshold, warmup=10, repeats=10) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm detector.gpu_util += gu else: results = detector.predict([image], FLAGS.det_threshold) if results['boxes_num'] == 0: continue keypoint_res = predict_with_given_det( image, results, topdown_keypoint_detector, keypoint_batch_size, FLAGS.det_threshold, FLAGS.keypoint_threshold, FLAGS.run_benchmark) if FLAGS.run_benchmark: cm, gm, gu = get_current_memory_mb() topdown_keypoint_detector.cpu_mem += cm topdown_keypoint_detector.gpu_mem += gm topdown_keypoint_detector.gpu_util += gu else: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) draw_pose(img_file, keypoint_res, visual_thread=FLAGS.keypoint_threshold, save_dir=FLAGS.output_dir)
def mot_topdown_unite_predict(mot_detector, topdown_keypoint_detector, image_list, keypoint_batch_size=1, save_res=False): det_timer = mot_detector.get_timer() store_res = [] image_list.sort() num_classes = mot_detector.num_classes for i, img_file in enumerate(image_list): # Decode image in advance in mot + pose prediction det_timer.preprocess_time_s.start() image, _ = decode_image(img_file, {}) det_timer.preprocess_time_s.end() if FLAGS.run_benchmark: mot_results = mot_detector.predict_image([image], run_benchmark=True, repeats=10) cm, gm, gu = get_current_memory_mb() mot_detector.cpu_mem += cm mot_detector.gpu_mem += gm mot_detector.gpu_util += gu else: mot_results = mot_detector.predict_image([image], visual=False) online_tlwhs, online_scores, online_ids = mot_results[ 0] # only support bs=1 in MOT model results = convert_mot_to_det( online_tlwhs[0], online_scores[0]) # only support single class for mot + pose if results['boxes_num'] == 0: continue keypoint_res = predict_with_given_det(image, results, topdown_keypoint_detector, keypoint_batch_size, FLAGS.run_benchmark) if save_res: save_name = img_file if isinstance(img_file, str) else i store_res.append([ save_name, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]] ]) if FLAGS.run_benchmark: cm, gm, gu = get_current_memory_mb() topdown_keypoint_detector.cpu_mem += cm topdown_keypoint_detector.gpu_mem += gm topdown_keypoint_detector.gpu_util += gu else: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) visualize_pose(img_file, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, save_dir=FLAGS.output_dir) if save_res: """ 1) store_res: a list of image_data 2) image_data: [imageid, rects, [keypoints, scores]] 3) rects: list of rect [xmin, ymin, xmax, ymax] 4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list 5) scores: mean of all joint conf """ with open("det_keypoint_unite_image_results.json", 'w') as wf: json.dump(store_res, wf, indent=4)
def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, seq_name=None): num_classes = self.num_classes image_list.sort() ids2names = self.pred_config.labels mot_results = [] for frame_id, img_file in enumerate(image_list): batch_image_list = [img_file] # bs=1 in MOT model frame, _ = decode_image(img_file, {}) if run_benchmark: # preprocess inputs = self.preprocess(batch_image_list) # warmup self.det_times.preprocess_time_s.start() inputs = self.preprocess(batch_image_list) self.det_times.preprocess_time_s.end() # model prediction result_warmup = self.predict(repeats=repeats) # warmup self.det_times.inference_time_s.start() result = self.predict(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats) # postprocess result_warmup = self.postprocess(inputs, result) # warmup self.det_times.postprocess_time_s.start() det_result = self.postprocess(inputs, result) self.det_times.postprocess_time_s.end() # tracking if self.use_reid: det_result['frame_id'] = frame_id det_result['seq_name'] = seq_name det_result['ori_image'] = frame det_result = self.reidprocess(det_result) result_warmup = self.tracking(det_result) self.det_times.tracking_time_s.start() if self.use_reid: det_result = self.reidprocess(det_result) tracking_outs = self.tracking(det_result) self.det_times.tracking_time_s.end() self.det_times.img_num += 1 cm, gm, gu = get_current_memory_mb() self.cpu_mem += cm self.gpu_mem += gm self.gpu_util += gu else: self.det_times.preprocess_time_s.start() inputs = self.preprocess(batch_image_list) self.det_times.preprocess_time_s.end() self.det_times.inference_time_s.start() result = self.predict() self.det_times.inference_time_s.end() self.det_times.postprocess_time_s.start() det_result = self.postprocess(inputs, result) self.det_times.postprocess_time_s.end() # tracking process self.det_times.tracking_time_s.start() if self.use_reid: det_result['frame_id'] = frame_id det_result['seq_name'] = seq_name det_result['ori_image'] = frame det_result = self.reidprocess(det_result) tracking_outs = self.tracking(det_result) self.det_times.tracking_time_s.end() self.det_times.img_num += 1 online_tlwhs = tracking_outs['online_tlwhs'] online_scores = tracking_outs['online_scores'] online_ids = tracking_outs['online_ids'] mot_results.append([online_tlwhs, online_scores, online_ids]) if visual: if len(image_list) > 1 and frame_id % 10 == 0: print('Tracking frame {}'.format(frame_id)) frame, _ = decode_image(img_file, {}) if isinstance(online_tlwhs, defaultdict): im = plot_tracking_dict( frame, num_classes, online_tlwhs, online_ids, online_scores, frame_id=frame_id, ids2names=[]) else: im = plot_tracking( frame, online_tlwhs, online_ids, online_scores, frame_id=frame_id) save_dir = os.path.join(self.output_dir, seq_name) if not os.path.exists(save_dir): os.makedirs(save_dir) cv2.imwrite( os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) return mot_results
def topdown_unite_predict(detector, topdown_keypoint_detector, image_list, keypoint_batch_size=1, save_res=False): det_timer = detector.get_timer() store_res = [] for i, img_file in enumerate(image_list): # Decode image in advance in det + pose prediction det_timer.preprocess_time_s.start() image, _ = decode_image(img_file, {}) det_timer.preprocess_time_s.end() if FLAGS.run_benchmark: results = detector.predict_image([image], run_benchmark=True, repeats=10) cm, gm, gu = get_current_memory_mb() detector.cpu_mem += cm detector.gpu_mem += gm detector.gpu_util += gu else: results = detector.predict_image([image], visual=False) results = detector.filter_box(results, FLAGS.det_threshold) if results['boxes_num'] > 0: keypoint_res = predict_with_given_det(image, results, topdown_keypoint_detector, keypoint_batch_size, FLAGS.run_benchmark) if save_res: save_name = img_file if isinstance(img_file, str) else i store_res.append([ save_name, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]] ]) else: results["keypoint"] = [[], []] keypoint_res = results if FLAGS.run_benchmark: cm, gm, gu = get_current_memory_mb() topdown_keypoint_detector.cpu_mem += cm topdown_keypoint_detector.gpu_mem += gm topdown_keypoint_detector.gpu_util += gu else: if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) visualize_pose(img_file, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, save_dir=FLAGS.output_dir) if save_res: """ 1) store_res: a list of image_data 2) image_data: [imageid, rects, [keypoints, scores]] 3) rects: list of rect [xmin, ymin, xmax, ymax] 4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list 5) scores: mean of all joint conf """ with open("det_keypoint_unite_image_results.json", 'w') as wf: json.dump(store_res, wf, indent=4)
def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, seq_name=None): mot_results = [] num_classes = self.num_classes image_list.sort() ids2names = self.pred_config.labels data_type = 'mcmot' if num_classes > 1 else 'mot' for frame_id, img_file in enumerate(image_list): batch_image_list = [img_file] # bs=1 in MOT model if run_benchmark: # preprocess inputs = self.preprocess(batch_image_list) # warmup self.det_times.preprocess_time_s.start() inputs = self.preprocess(batch_image_list) self.det_times.preprocess_time_s.end() # model prediction result_warmup = self.predict(repeats=repeats) # warmup self.det_times.inference_time_s.start() result = self.predict(repeats=repeats) self.det_times.inference_time_s.end(repeats=repeats) # postprocess result_warmup = self.postprocess(inputs, result) # warmup self.det_times.postprocess_time_s.start() det_result = self.postprocess(inputs, result) self.det_times.postprocess_time_s.end() # tracking result_warmup = self.tracking(det_result) self.det_times.tracking_time_s.start() online_tlwhs, online_scores, online_ids = self.tracking( det_result) self.det_times.tracking_time_s.end() self.det_times.img_num += 1 cm, gm, gu = get_current_memory_mb() self.cpu_mem += cm self.gpu_mem += gm self.gpu_util += gu else: self.det_times.preprocess_time_s.start() inputs = self.preprocess(batch_image_list) self.det_times.preprocess_time_s.end() self.det_times.inference_time_s.start() result = self.predict() self.det_times.inference_time_s.end() self.det_times.postprocess_time_s.start() det_result = self.postprocess(inputs, result) self.det_times.postprocess_time_s.end() # tracking process self.det_times.tracking_time_s.start() online_tlwhs, online_scores, online_ids = self.tracking( det_result) self.det_times.tracking_time_s.end() self.det_times.img_num += 1 if visual: if len(image_list) > 1 and frame_id % 10 == 0: print('Tracking frame {}'.format(frame_id)) frame, _ = decode_image(img_file, {}) im = plot_tracking_dict(frame, num_classes, online_tlwhs, online_ids, online_scores, frame_id=frame_id, ids2names=ids2names) if seq_name is None: seq_name = image_list[0].split('/')[-2] save_dir = os.path.join(self.output_dir, seq_name) if not os.path.exists(save_dir): os.makedirs(save_dir) cv2.imwrite( os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) mot_results.append([online_tlwhs, online_scores, online_ids]) return mot_results