class TrackerManager(object): def __init__(self, vid_list, init_time): self.init_time = init_time video_id, camera_id, max_frames, width, height = vid_list[1:] self.max_frames = max_frames self.tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height) self.postprocess_trans = get_postprocess_trans(height, width) self.prev_img = None self.n = 0 def process_output(self, dets): dets = post_process(dets, self.postprocess_trans)[0] self.tracker.step(dets) self.n += 1 def is_done(self): return self.n >= self.max_frames def finalize(self): self.tracker.finalize()
def export_measures(workspace_path: str, dataset: Dataset, tracker: Tracker, overlaps: list, failures: list, times: list): # create per-sequence output structure speed = len(dataset.sequences) * [0] results = len(dataset.sequences) * [0] for i, sequence in enumerate(dataset.sequences): speed_fps = 1.0 / times[i] results[i] = {'sequence_name': sequence.name, 'sequence_length': sequence.length, \ 'overlap': overlaps[i], 'failures': failures[i], 'speed': speed_fps} speed[i] = speed_fps # average measures average_overlap = sum(overlaps) / len(dataset.sequences) total_failures = sum(failures) average_speed = sum(speed) / len(dataset.sequences) # final output structure with all information output = {'tracker_name': tracker.name(), 'results': results, 'average_overlap': average_overlap, \ 'total_failures': total_failures, 'average_speed': average_speed, 'total_frames': dataset.number_frames} # create output directory and save output in json file output_dir = os.path.join(workspace_path, 'analysis', tracker.name()) if not os.path.exists(output_dir): os.makedirs(output_dir) file_path = os.path.join(output_dir, 'results.json') with open(file_path, 'w') as f: json.dump(output, f, indent=2) print_summary(output) return output
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() # inp = (torch.ones([1, 3, 320, 320]).cuda(), # torch.ones([1, 3, 320, 320]).cuda(), # torch.ones([1, 1, 320, 320]).cuda()) # pytorch_to_caffe.trans_net(self.model, inp, 'res18') # pytorch_to_caffe.save_prototxt('{}.prototxt'.format('res18')) # pytorch_to_caffe.save_caffemodel('{}.caffemodel'.format('res18')) self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
def reset(person_waiter: WaitingForPerson, person_checker: CheckingPerson, tracker: Tracker, temp_checker: TemperatureChecker, looker: Looker): """ Resets the instances to their initial state. """ person_waiter.reset() person_checker.reset() temp_checker.reset() tracker.reset() looker.stop()
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) self.motion = opt.motion if self.motion == 'transformer': import sys M3_PATH = '/u/jozhang/code/motion3d/' sys.path.insert(0, M3_PATH) from models.transformer import DPTransformer # motion = DPTransformer(2, 64, {'depth': 3, 'heads': 8, 'dim_head': 8, 'mlp_dim': 64, 'dropout': 0.}) # trans_path = '/scratch/cluster/jozhang/logs/hydra/2021-01-30/15-36-54/models/ckpt-latest.dat' ckpt = torch.load(opt.transformer_load_path) self.transformer = ckpt['model'].cuda() print( f'Using transformer motion loaded from {opt.transformer_load_path}' ) elif self.motion == 'zero': print(f'Using no motion model') elif self.motion == 'cttrack': print(f'Using cttrack motion model') else: assert False, f'Do not recognize such motion model {self.motion}' self.negate_motion = opt.negate_motion if self.negate_motion: logging.warning('Motion is being negated! Are you sure?') self.all_pre_images = []
def __init__(self, vid_list, init_time): self.init_time = init_time video_id, camera_id, max_frames, width, height = vid_list[1:] self.max_frames = max_frames self.tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height) self.postprocess_trans = get_postprocess_trans(height, width) self.prev_img = None self.n = 0
def reset_tracking(self, opt): if self.dataset == "nuscenes": self.tracker = {} for class_name in NUSCENES_TRACKING_NAMES: self.tracker[class_name] = Tracker(opt, self.model, h=self.img_height, w=self.img_width) else: self.tracker = Tracker(opt, self.model, h=self.img_height, w=self.img_width) self.pre_images = None self.pre_image_ori = None
def main(params): config = vars(parser.parse_args()) # env = gym.make(config['env']) env = make_env(config['env']) env.seed(seed) agent = PPO(env, cfg['agent']) tag = params['tag'] # Initiate the tracker for stats tracker = Tracker( config['env'], #env.unwrapped.spec.id, tag, seed, cfg['agent'], ['Epoch', 'Ep_Reward', 'Cost']) # Train the agent agent.train(tracker, n_episodes=config['epochs'], n_step=config['stepmax'], verbose=config['verbose'], params=cfg['agent'], hyperp=config)
def train_ner_model(wordlists_path): master = combine_wordlists(PROJ_PATH + '/' + wordlists_path) train_valid_split = int(len(master) * 0.8) train_data = master[0:train_valid_split] valid_data = master[train_valid_split:] (train_sents, word_tokenizer), (train_chars, char_tokenizer), ( train_concept, concept_tokenizer) = load_data(train_data) (valid_sents, _), (valid_chars, _), (valid_concept, _) = load_data( valid_data, word_tokenizer, concept_tokenizer) #(test_sents, _), (test_chars, _), (test_concept, _) = load_data(test_data, word_tokenizer, concept_tokenizer) config = json.load(open(PROJ_PATH + '/src/model/config.json', 'r')) model_parameters = config['parameters'] model_parameters['vocab_size'] = len(word_tokenizer.word_index) model_parameters['char_dim'] = len(char_tokenizer) model_parameters['output_dim'] = len(concept_tokenizer.word_index) model_info = config['model_info'] tracker = Tracker( basedir=PROJ_PATH + '/models/', desc=model_info['model_description'], title=model_info['model_title'], enter_desc=False, name=model_info['username'], ) model = BiLSTM_CRF(model_parameters) model.define_model( char_embedding_dim=model_parameters['char_embedding_dim'], word_embedding_dim=model_parameters['word_embedding_dim'], char_lstm_cell=model_parameters['char_lstm_cell'], lstm_cell=model_parameters['lstm_cell'], ) model.generate_model_diagram(tracker.get_model_dir(), tracker.title) model.train(train_sents, train_chars, train_concept, valid_sents, valid_chars, valid_concept) model.save_model(tracker.get_model_dir(), tracker.title) # save tokenizers pickle.dump(word_tokenizer, open(tracker.get_model_dir() + '/word_tokenizer.ser', 'wb')) pickle.dump(char_tokenizer, open(tracker.get_model_dir() + '/char_tokenizer.ser', 'wb')) pickle.dump(concept_tokenizer, open(tracker.get_model_dir() + '/concept_tokenizer.ser', 'wb')) tracker.log()
class VideoManager(object): def __init__(self, path, vid_list, model_loading_time): init_time = time.time() - model_loading_time self.init_time = init_time vid_filename = vid_list[0] video_path = os.path.join(path, vid_filename) self.cap = cv2.VideoCapture(video_path) video_id, camera_id, max_frames, width, height = vid_list[1:] self.max_frames = max_frames self.tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height) self.preprocess_function = get_img_transform(height, width, new_size=512) self.postprocess_trans = get_postprocess_trans(height, width) region_mask = get_region_mask(camera_id, height, width) self.region_mask = np.where(region_mask, 255, 0).astype(np.uint8) self.prev_img = None self.n = 0 def get_img(self): ret, frame = self.cap.read() frame = cv2.bitwise_and(frame, frame, mask=self.region_mask) img = self.preprocess_function(frame) img = torch.from_numpy(img).to(torch.device('cuda')) self.n += 1 prev_img = self.prev_img if self.prev_img is not None else img self.prev_img = img return img, prev_img def process_output(self, dets): dets = post_process(dets, self.postprocess_trans)[0] self.tracker.step(dets) def is_done(self): return self.n >= self.max_frames def finalize(self): self.tracker.finalize()
def __init__(self, path, vid_list, model_loading_time): init_time = time.time() - model_loading_time self.init_time = init_time vid_filename = vid_list[0] video_path = os.path.join(path, vid_filename) self.cap = cv2.VideoCapture(video_path) video_id, camera_id, max_frames, width, height = vid_list[1:] self.max_frames = max_frames self.tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height) self.preprocess_function = get_img_transform(height, width, new_size=512) self.postprocess_trans = get_postprocess_trans(height, width) region_mask = get_region_mask(camera_id, height, width) self.region_mask = np.where(region_mask, 255, 0).astype(np.uint8) self.prev_img = None self.n = 0
def tracker_thread_fn(q_in, init_time, path, debug=0, new_thresh=0.4, track_thresh=0.2): video_id, camera_id, max_frames, width, height = get_video_params(path) postprocess_trans = get_postprocess_trans(height, width) tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height, new_thresh=new_thresh, track_thresh=track_thresh) for i in range(max_frames): dets = q_in.get() get_time = time.time() for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets = post_process(dets, postprocess_trans, track_thresh=track_thresh)[0] tracker.step(dets) if debug > 0 and i % 100 == 99: frame_time = time.time() - init_time FPS = (i + 1) / frame_time print("At frame {} FPS {}".format(i + 1, FPS), file=sys.stderr) tracker.finalize()
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device("cuda") else: opt.device = torch.device("cpu") print("Creating model...") self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) # self.pause = not opt.no_pause self.rest_focal_length = (self.trained_dataset.rest_focal_length if self.opt.test_focal_length < 0 else self.opt.test_focal_length) self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.dataset = opt.dataset if self.dataset == "nuscenes": self.tracker = {} for class_name in NUSCENES_TRACKING_NAMES: self.tracker[class_name] = Tracker(opt, self.model) else: self.tracker = Tracker(opt, self.model) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) self.img_height = 100 self.img_width = 100
def __init__(self, name_, brush_, infoFile_): # Load the properties of the robot from file try: self._info = json.loads(open(infoFile_, 'r').read()) except ValueError: self._info = {} # Call parent constructor super(Robot, self).__init__(name_=name_, pos_=self._info["pos"], brush_=brush_) # Is the robot stopped self._stopped = False # Is the robot master self._isMaster = False # Current zoom level self._zoom = 1.0 # Store all items which belong to the robot self._items = [self, ] # Associate a tracker to store the path (in m) # Tracker only manipulates (x,y) coordinates self._tracker = Tracker(self._info["pos"][:2]) # Show the supervisor information on screen self._showSupervisors = True # Set envelope self._envelope = self._info["envelope"] # Cache the bounding rect xmin, ymin, xmax, ymax = self.getBounds() self._boundingRect = QtCore.QRectF(QtCore.QPointF(xmin, ymin), QtCore.QPointF(xmax, ymax)) # Cache the shape points = [QtCore.QPointF(p[0], p[1]) for p in self._envelope] self._shape = QtGui.QPainterPath() self._shape.addPolygon(QtGui.QPolygonF(points))
def main(params): config = vars(parser.parse_args()) channel = EngineConfigurationChannel() unity_env = UnityEnvironment(file_name=None, side_channels=[channel]) channel.set_configuration_parameters(time_scale=20.0) env = UnityToGymWrapper(unity_env) agent = DDQN(env, cfg['agent']) tag = 'DDQN' # Initiate the tracker for stats tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'], ['Epoch', 'Ep_Reward']) # Train the agent agent.train(tracker, n_episodes=config['epochs'], verbose=config['verbose'], params=cfg['agent'], hyperp=config)
class Detector(object): def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device("cuda") else: opt.device = torch.device("cpu") print("Creating model...") self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) # self.pause = not opt.no_pause self.rest_focal_length = (self.trained_dataset.rest_focal_length if self.opt.test_focal_length < 0 else self.opt.test_focal_length) self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.dataset = opt.dataset if self.dataset == "nuscenes": self.tracker = {} for class_name in NUSCENES_TRACKING_NAMES: self.tracker[class_name] = Tracker(opt, self.model) else: self.tracker = Tracker(opt, self.model) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) self.img_height = 100 self.img_width = 100 def run(self, image_or_path_or_tensor, meta={}, image_info=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, track_time, tot_time, display_time = 0, 0, 0, 0 self.debugger.clear() start_time = time.time() # read image pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(""): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor["image"][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += loaded_time - start_time detections = [] # for multi-scale testing for scale in self.opt.test_scales: scale_start_time = time.time() if not pre_processed: # not prefetch testing or demo images, meta = self.pre_process(image, scale, meta) else: # prefetch testing images = pre_processed_images["images"][scale][0] meta = pre_processed_images["meta"][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} if "pre_dets" in pre_processed_images["meta"]: meta["pre_dets"] = pre_processed_images["meta"]["pre_dets"] if "cur_dets" in pre_processed_images["meta"]: meta["cur_dets"] = pre_processed_images["meta"]["cur_dets"] images = images.to(self.opt.device, non_blocking=self.opt.non_block_test) # initializing tracker pre_hms, pre_inds = None, None pre_process_time = time.time() pre_time += pre_process_time - scale_start_time # run the network # output: the output feature maps, only used for visualizing # dets: output tensors after extracting peaks output, dets, forward_time, FeatureMaps = self.process( images, self.pre_images, pre_hms, pre_inds, return_time=True) net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time # convert the cropped and 4x downsampled output coordinate system # back to the input image coordinate system result = self.post_process(dets, meta, scale) post_process_time = time.time() post_time += post_process_time - decode_time detections.append(result) if self.opt.debug >= 2: self.debug( self.debugger, images, result, output, scale, pre_images=self.pre_images if not self.opt.no_pre_img else None, pre_hms=pre_hms, ) # merge multi-scale testing results results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time # public detection mode in MOT challenge if self.opt.public_det: results = (pre_processed_images["meta"]["cur_dets"] if self.opt.public_det else None) if self.dataset == "nuscenes": trans_matrix = np.array(image_info["trans_matrix"], np.float32) results_by_class = {} ddd_boxes_by_class = {} depths_by_class = {} ddd_boxes_by_class2 = {} ddd_org_boxes_by_class = {} ddd_box_submission1 = {} ddd_box_submission2 = {} for class_name in NUSCENES_TRACKING_NAMES: results_by_class[class_name] = [] ddd_boxes_by_class2[class_name] = [] ddd_boxes_by_class[class_name] = [] depths_by_class[class_name] = [] ddd_org_boxes_by_class[class_name] = [] ddd_box_submission1[class_name] = [] ddd_box_submission2[class_name] = [] for det in results: cls_id = int(det["class"]) class_name = nuscenes_class_name[cls_id - 1] if class_name not in NUSCENES_TRACKING_NAMES: continue if det["score"] < 0.3: continue if class_name == "pedestrian" and det["score"] < 0.35: continue results_by_class[class_name].append(det["bbox"].tolist() + [det["score"]]) size = [ float(det["dim"][1]), float(det["dim"][2]), float(det["dim"][0]), ] rot_cam = Quaternion(axis=[0, 1, 0], angle=det["rot_y"]) translation_submission1 = np.dot( trans_matrix, np.array( [ det["loc"][0], det["loc"][1] - size[2], det["loc"][2], 1 ], np.float32, ), ).copy() loc = np.array([det["loc"][0], det["loc"][1], det["loc"][2]], np.float32) depths_by_class[class_name].append([float(det["loc"][2]) ].copy()) trans = [det["loc"][0], det["loc"][1], det["loc"][2]] ddd_org_boxes_by_class[class_name].append([ float(det["dim"][0]), float(det["dim"][1]), float(det["dim"][2]) ] + trans + [det["rot_y"]]) box = Box(loc, size, rot_cam, name="2", token="1") box.translate(np.array([0, -box.wlh[2] / 2, 0])) box.rotate(Quaternion(image_info["cs_record_rot"])) box.translate(np.array(image_info["cs_record_trans"])) box.rotate(Quaternion(image_info["pose_record_rot"])) box.translate(np.array(image_info["pose_record_trans"])) rotation = box.orientation rotation = [ float(rotation.w), float(rotation.x), float(rotation.y), float(rotation.z), ] ddd_box_submission1[class_name].append([ float(translation_submission1[0]), float(translation_submission1[1]), float(translation_submission1[2]), ].copy() + size.copy() + rotation.copy()) q = Quaternion(rotation) angle = q.angle if q.axis[2] > 0 else -q.angle ddd_boxes_by_class[class_name].append([ size[2], size[0], size[1], box.center[0], box.center[1], box.center[2], angle, ].copy()) online_targets = [] for class_name in NUSCENES_TRACKING_NAMES: if len(results_by_class[class_name]) > 0 and NMS: boxess = torch.from_numpy( np.array(results_by_class[class_name])[:, :4]) scoress = torch.from_numpy( np.array(results_by_class[class_name])[:, -1]) if class_name == "bus" or class_name == "truck": ovrlp = 0.7 else: ovrlp = 0.8 keep, count = nms(boxess, scoress, overlap=ovrlp) keep = keep.data.numpy().tolist() keep = sorted(set(keep)) results_by_class[class_name] = np.array( results_by_class[class_name])[keep] ddd_boxes_by_class[class_name] = np.array( ddd_boxes_by_class[class_name])[keep] depths_by_class[class_name] = np.array( depths_by_class[class_name])[keep] ddd_org_boxes_by_class[class_name] = np.array( ddd_org_boxes_by_class[class_name])[keep] ddd_box_submission1[class_name] = np.array( ddd_box_submission1[class_name])[keep] online_targets += self.tracker[class_name].update( results_by_class[class_name], FeatureMaps, ddd_boxes=ddd_boxes_by_class[class_name], depths_by_class=depths_by_class[class_name], ddd_org_boxes=ddd_org_boxes_by_class[class_name], submission=ddd_box_submission1[class_name], classe=class_name, ) else: online_targets = self.tracker.update(results, FeatureMaps) return online_targets def _transform_scale(self, image, scale=1): """ Prepare input image in different testing modes. Currently support: fix short size/ center crop to a fixed size/ keep original resolution but pad to a multiplication of 32 """ height, width = image.shape[0:2] new_height = int(height * scale) new_width = int(width * scale) if self.opt.fix_short > 0: if height < width: inp_height = self.opt.fix_short inp_width = (int(width / height * self.opt.fix_short) + 63) // 64 * 64 else: inp_height = (int(height / width * self.opt.fix_short) + 63) // 64 * 64 inp_width = self.opt.fix_short c = np.array([width / 2, height / 2], dtype=np.float32) s = np.array([width, height], dtype=np.float32) elif self.opt.fix_res: inp_height, inp_width = self.opt.input_h, self.opt.input_w c = np.array([new_width / 2.0, new_height / 2.0], dtype=np.float32) s = max(height, width) * 1.0 # s = np.array([inp_width, inp_height], dtype=np.float32) else: inp_height = (new_height | self.opt.pad) + 1 inp_width = (new_width | self.opt.pad) + 1 c = np.array([new_width // 2, new_height // 2], dtype=np.float32) s = np.array([inp_width, inp_height], dtype=np.float32) resized_image = cv2.resize(image, (new_width, new_height)) return resized_image, c, s, inp_width, inp_height, height, width def pre_process(self, image, scale, input_meta={}): """ Crop, resize, and normalize image. Gather meta data for post processing and tracking. """ resized_image, c, s, inp_width, inp_height, height, width = self._transform_scale( image) trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) out_height = inp_height // self.opt.down_ratio out_width = inp_width // self.opt.down_ratio trans_output = get_affine_transform(c, s, 0, [out_width, out_height]) inp_image = cv2.warpAffine(resized_image, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR) inp_image = ((inp_image / 255.0 - self.mean) / self.std).astype( np.float32) images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) if self.opt.flip_test: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) meta = { "calib": np.array(input_meta["calib"], dtype=np.float32) if "calib" in input_meta else self._get_default_calib(width, height) } meta.update({ "c": c, "s": s, "height": height, "width": width, "out_height": out_height, "out_width": out_width, "inp_height": inp_height, "inp_width": inp_width, "trans_input": trans_input, "trans_output": trans_output, }) if "pre_dets" in input_meta: meta["pre_dets"] = input_meta["pre_dets"] if "cur_dets" in input_meta: meta["cur_dets"] = input_meta["cur_dets"] return images, meta def _trans_bbox(self, bbox, trans, width, height): """ Transform bounding boxes according to image crop. """ bbox = np.array(copy.deepcopy(bbox), dtype=np.float32) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1) return bbox def _get_additional_inputs(self, dets, meta, with_hm=True): """ Render input heatmap from previous trackings. """ trans_input, trans_output = meta["trans_input"], meta["trans_output"] inp_width, inp_height = meta["inp_width"], meta["inp_height"] out_width, out_height = meta["out_width"], meta["out_height"] input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32) output_inds = [] for det in dets: if det["score"] < self.opt.pre_thresh or det["active"] == 0: continue bbox = self._trans_bbox(det["bbox"], trans_input, inp_width, inp_height) bbox_out = self._trans_bbox(det["bbox"], trans_output, out_width, out_height) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if h > 0 and w > 0: radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if with_hm: draw_umich_gaussian(input_hm[0], ct_int, radius) ct_out = np.array( [(bbox_out[0] + bbox_out[2]) / 2, (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32, ) output_inds.append(ct_out[1] * out_width + ct_out[0]) if with_hm: input_hm = input_hm[np.newaxis] if self.opt.flip_test: input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]), axis=0) input_hm = torch.from_numpy(input_hm).to(self.opt.device) output_inds = np.array(output_inds, np.int64).reshape(1, -1) output_inds = torch.from_numpy(output_inds).to(self.opt.device) return input_hm, output_inds def _get_default_calib(self, width, height): calib = np.array([ [self.rest_focal_length, 0, width / 2, 0], [0, self.rest_focal_length, height / 2, 0], [0, 0, 1, 0], ]) return calib def _sigmoid_output(self, output): if "hm" in output: output["hm"] = output["hm"].sigmoid_() if "hm_hp" in output: output["hm_hp"] = output["hm_hp"].sigmoid_() if "dep" in output: output["dep"] = 1.0 / (output["dep"].sigmoid() + 1e-6) - 1.0 output["dep"] *= self.opt.depth_scale return output def _flip_output(self, output): average_flips = ["hm", "wh", "dep", "dim"] neg_average_flips = ["amodel_offset"] single_flips = [ "ltrb", "nuscenes_att", "velocity", "ltrb_amodal", "reg", "hp_offset", "rot", "tracking", "pre_hm", ] for head in output: if head in average_flips: output[head] = (output[head][0:1] + flip_tensor(output[head][1:2])) / 2 if head in neg_average_flips: flipped_tensor = flip_tensor(output[head][1:2]) flipped_tensor[:, 0::2] *= -1 output[head] = (output[head][0:1] + flipped_tensor) / 2 if head in single_flips: output[head] = output[head][0:1] if head == "hps": output["hps"] = (output["hps"][0:1] + flip_lr_off( output["hps"][1:2], self.flip_idx)) / 2 if head == "hm_hp": output["hm_hp"] = (output["hm_hp"][0:1] + flip_lr( output["hm_hp"][1:2], self.flip_idx)) / 2 return output def process(self, images, pre_images=None, pre_hms=None, pre_inds=None, return_time=False): with torch.no_grad(): torch.cuda.synchronize() output, FeatureMaps = self.model(images, pre_images, pre_hms) output = output[-1] output = self._sigmoid_output(output) output.update({"pre_inds": pre_inds}) if self.opt.flip_test: output = self._flip_output(output) torch.cuda.synchronize() forward_time = time.time() dets = generic_decode(output, K=self.opt.K, opt=self.opt) torch.cuda.synchronize() for k in dets: dets[k] = dets[k].detach().cpu().numpy() if return_time: return output, dets, forward_time, FeatureMaps else: return output, dets, FeatureMaps def post_process(self, dets, meta, scale=1): dets = generic_post_process( self.opt, dets, [meta["c"]], [meta["s"]], meta["out_height"], meta["out_width"], self.opt.num_classes, [meta["calib"]], meta["height"], meta["width"], ) self.this_calib = meta["calib"] if scale != 1: for i in range(len(dets[0])): for k in ["bbox", "hps"]: if k in dets[0][i]: dets[0][i][k] = (np.array(dets[0][i][k], np.float32) / scale).tolist() return dets[0] def merge_outputs(self, detections): assert len(self.opt.test_scales) == 1, "multi_scale not supported!" results = [] for i in range(len(detections[0])): if detections[0][i]["score"] > self.opt.out_thresh: results.append(detections[0][i]) return results def debug(self, debugger, images, dets, output, scale=1, pre_images=None, pre_hms=None): img = images[0].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * self.std + self.mean) * 255.0), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output["hm"][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, "pred_hm") if "hm_hp" in output: pred = debugger.gen_colormap_hp( output["hm_hp"][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, "pred_hmhp") if pre_images is not None: pre_img = pre_images[0].detach().cpu().numpy().transpose(1, 2, 0) pre_img = np.clip(((pre_img * self.std + self.mean) * 255.0), 0, 255).astype(np.uint8) debugger.add_img(pre_img, "pre_img") if pre_hms is not None: pre_hm = debugger.gen_colormap( pre_hms[0].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, "pre_hm") def show_results(self, debugger, image, results): debugger.add_img(image, img_id="generic") if self.opt.tracking: debugger.add_img( self.pre_image_ori if self.pre_image_ori is not None else image, img_id="previous", ) self.pre_image_ori = image for j in range(len(results)): if results[j]["score"] > self.opt.vis_thresh: if "active" in results[j] and results[j]["active"] == 0: continue item = results[j] if "bbox" in item: sc = (item["score"] if self.opt.demo == "" or not ("tracking_id" in item) else item["tracking_id"]) sc = item[ "tracking_id"] if self.opt.show_track_color else sc debugger.add_coco_bbox(item["bbox"], item["class"] - 1, sc, img_id="generic") if "tracking" in item: debugger.add_arrow(item["ct"], item["tracking"], img_id="generic") tracking_id = item[ "tracking_id"] if "tracking_id" in item else -1 if ("tracking_id" in item and self.opt.demo == "" and not self.opt.show_track_color): debugger.add_tracking_id(item["ct"], item["tracking_id"], img_id="generic") if (item["class"] in [1, 2]) and "hps" in item: debugger.add_coco_hp(item["hps"], tracking_id=tracking_id, img_id="generic") if (len(results) > 0 and "dep" in results[0] and "alpha" in results[0] and "dim" in results[0]): debugger.add_3d_detection( image if not self.opt.qualitative else cv2.resize( debugger.imgs["pred_hm"], (image.shape[1], image.shape[0])), False, results, self.this_calib, vis_thresh=self.opt.vis_thresh, img_id="ddd_pred", ) debugger.add_bird_view( results, vis_thresh=self.opt.vis_thresh, img_id="bird_pred", cnt=self.cnt, ) if self.opt.show_track_color and self.opt.debug == 4: del debugger.imgs["generic"], debugger.imgs["bird_pred"] def reset_tracking(self, opt): if self.dataset == "nuscenes": self.tracker = {} for class_name in NUSCENES_TRACKING_NAMES: self.tracker[class_name] = Tracker(opt, self.model, h=self.img_height, w=self.img_width) else: self.tracker = Tracker(opt, self.model, h=self.img_height, w=self.img_width) self.pre_images = None self.pre_image_ori = None def update_public_detections(self, detections_file): self.det_file = pd.read_csv(detections_file, header=None, sep=" ") self.det_group = self.det_file.groupby(0) self.det_group_keys = self.det_group.indices.keys()
def run_single_video_serial(path, debug=0, full_precision=False): init_time = time.time() if debug >= 1: print("Starting for video: {}".format(path), file=sys.stderr) video_id, camera_id, max_frames, width, height = get_video_params(path) cap = cv2.VideoCapture(path) model = create_model() model = load_model(model, 'checkpoints/coco_tracking.pth') model.to(torch.device('cuda')) model.eval() tracker = Tracker(init_time, video_id, max_frames, camera_id, width, height, debug=debug) preprocess_function = get_img_transform(height, width, new_size=512) postprocess_trans = get_postprocess_trans(height, width) region_mask = get_region_mask(camera_id, height, width) region_mask = np.where(region_mask, 255, 0).astype(np.uint8) if debug > 2: cv2.imwrite("mask.png", region_mask) pre_img = None for i in range(max_frames): ret, frame = cap.read() if debug >= 2: cv2.imshow("Frame", frame) cv2.waitKey(1) tracker.frame = np.copy(frame) frame = cv2.bitwise_and(frame, frame, mask=region_mask) img = preprocess_function(frame) img = torch.from_numpy(img).to(torch.device('cuda')) if pre_img is None: pre_img = img with torch.no_grad(): with torch.cuda.amp.autocast(enabled=not full_precision): out = model(img, pre_img, None)[-1] out = sigmoid_output(out) dets = generic_decode(out) pre_img = img for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets = post_process(dets, postprocess_trans)[0] tracker.step(dets) if debug >= 1 and i % 100 == 99: frame_time = time.time() - init_time FPS = (i + 1) / frame_time print("At frame {} FPS {}".format(i + 1, FPS), file=sys.stderr) tracker.finalize() if debug >= 1: print("Finished video: {}".format(path), file=sys.stderr)
def run(sequence_dir, detection_file, output_file, max_age, n_init, reid_thr, checkpoint_dir): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ new_npy = encode_newfeat(detection_file, checkpoint_dir) seq_info = gather_sequence_info(sequence_dir, new_npy) tracker = Tracker(max_age=max_age, n_init=n_init, reid_thr=reid_thr) results = [] if not osp.exists( os.path.join("warp_mat", "%s.npy" % seq_info["sequence_name"])): if not osp.exists("./warp_mat"): os.system('mkdir ./warp_mat') warp_matrix = np.array(WarpMatrix(seq_info).mat) output_filename = os.path.join("warp_mat", "%s.npy" % seq_info["sequence_name"]) np.save(output_filename, warp_matrix, allow_pickle=False) else: warp_matrix = np.load( os.path.join("warp_mat", "%s.npy" % seq_info["sequence_name"])) def frame_callback(frame_idx): print("Processing %s" % seq_info["sequence_name"], "frame %05d" % frame_idx) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, w_img=seq_info["image_size"][1], h_img=seq_info["image_size"][0]) # Update tracker. tracker.predict(warp_matrix[frame_idx - 2]) tracker.update(detections, seq_info["sequence_name"], frame_idx, checkpoint_dir) # Store results. for track in tracker.tracks: if track.time_since_update >= 1: continue bbox = track.to_tlwh2() results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3] ]) # Run tracker. frame_idx = seq_info["min_frame_idx"] while frame_idx <= seq_info["max_frame_idx"]: frame_callback(frame_idx) frame_idx += 1 # Store results. output_path = os.path.dirname(output_file) if not os.path.exists(output_path): os.makedirs(output_path) f = open(output_file, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5]), file=f)
def video(): """ Principal method of the program that reads the data streams, displays the video streams to the user and other messages. """ global image_timestamp global thermal global normal global temp # Define the variable that remember the current state: 'waiting' that awaits # for a person to enter the frame and 'person_detected' in which checks # continuously if the wearer's mask is worn correctly. current_state = 'waiting' looker = Looker() talker = Talker() tracker = Tracker(args['tracker']) detector = FaceAndMaskDetector(args['confidence']) temp_checker = TemperatureChecker() person_waiter = WaitingForPerson(tracker, detector, args['wait']) person_checker = CheckingPerson(tracker, talker, detector, temp_checker, args['value'], args['wait'], args['threshold'], args['state'], args['move']) while True: # Get current frames normal_wrapper.set(normal) curr_normal = normal_wrapper.get() temp_wrapper.set(temp) curr_temp = temp_wrapper.get() thermal_wrapper.set(thermal) curr_thermal = thermal_wrapper.get() # While in the 'waiting' state check if a person is in the frame if current_state == 'waiting': person_waiter.run_prediction(curr_normal) # If a person entered the frame, change the current state if person_waiter.person_in_frame(): current_state = 'person_detected' # While in the 'person_detected' state check if the person is wearing # the mask properly. if current_state == 'person_detected': person_checker.check_person(curr_normal, curr_temp, looker, image_timestamp) if person_checker.mask_ok: print(f'{person_checker.temp_checker.get_temp()} C') sleep(3) person_checker.speak_temperature() reset(person_waiter, person_checker, tracker, temp_checker, looker) looker = Looker() current_state = 'waiting' elif person_checker.lost_tracking: reset(person_waiter, person_checker, tracker, temp_checker, looker) looker = Looker() current_state = 'waiting' frame = vstack((curr_normal, curr_thermal)) # Display the concatenated current frame cv.imshow('Video stream', frame) # Exit if Q pressed if cv.waitKey(1) & 0xFF == ord('q'): break # Close the video stream, stops the thread that centers the camera on the # face and exits the program cv.destroyAllWindows() looker.stop() sys.exit(0)
# assign unique color to each id id_name = [] for i in range(0, 1000): id_name.append(i) id_color = IdColor(id_name) # assign session tf.Graph().as_default() sess = tf.Session(config=config) keras.backend.set_session(sess) # load tracker tracker = Tracker(sess, MEM_SIZE, IMG_SIZE, FEATURE_SIZE, ori_height=height, ori_width=width, iou_threshold=0.3, kl_threshold=0.6) sess.run(tf.global_variables_initializer()) log_dir = '/home/msis_dasol/master_thesis/RAN/for_paper/VGG16_skip_connection/memsize_5' tf_util.restore_from_dir(sess, os.path.join(log_dir, 'checkpoints')) # load detector yolov3 = YOLOv3(sess) total_tracking_obejct = 0 var_sizes = [ np.product(list(map(int, v.shape))) * v.dtype.size for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) ]
class CenterTrackOperator(erdos.Operator): def __init__(self, camera_stream, obstacle_tracking_stream, flags, camera_setup): from dataset.dataset_factory import get_dataset from model.model import create_model, load_model from opts import opts from utils.tracker import Tracker camera_stream.add_callback(self.on_frame_msg, [obstacle_tracking_stream]) self._flags = flags self._logger = erdos.utils.setup_logging(self.config.name, self.config.log_file_name) self._csv_logger = erdos.utils.setup_csv_logging( self.config.name + '-csv', self.config.csv_log_file_name) self._camera_setup = camera_setup # TODO(ionel): Might have to filter labels when running with a coco # and a nuscenes model. num_classes = { 'kitti_tracking': 3, 'coco': 90, 'mot': 1, 'nuscenes': 10 } # Other flags: # 1) --K ; max number of output objects. # 2) --fix_short ; resizes the height of the image to fix short, and # the width such the aspect ratio is maintained. # 3) --pre_hm ; pre heat map. # 4) --input_w; str(camera_setup.width) # 5) --input_h; str(camera_setup.height) args = [ 'tracking', '--load_model', flags.center_track_model_path, '--dataset', flags.center_track_model, '--test_focal_length', str(int(camera_setup.get_focal_length())), '--out_thresh', str(flags.obstacle_detection_min_score_threshold), '--pre_thresh', str(flags.obstacle_detection_min_score_threshold), '--new_thresh', str(flags.obstacle_detection_min_score_threshold), '--track_thresh', str(flags.obstacle_detection_min_score_threshold), '--max_age', str(flags.obstacle_track_max_age), '--num_classes', str(num_classes[flags.center_track_model]), '--tracking', '--hungarian' ] opt = opts().init(args) gpu = True if gpu: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.opt = opt self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(self.opt.device) self.model.eval() self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) @staticmethod def connect(camera_stream): obstacle_tracking_stream = erdos.WriteStream() return [obstacle_tracking_stream] @erdos.profile_method() def on_frame_msg(self, msg, obstacle_tracking_stream): """Invoked when a FrameMessage is received on the camera stream.""" self._logger.debug('@{}: {} received frame'.format( msg.timestamp, self.config.name)) assert msg.frame.encoding == 'BGR', 'Expects BGR frames' image_np = msg.frame.as_bgr_numpy_array() results = self.run_model(image_np) obstacles = [] for res in results: track_id = res['tracking_id'] bbox = res['bbox'] score = res['score'] (label_id, ) = res['class'] - 1, if label_id > 80: continue label = self.trained_dataset.class_name[label_id] if label in ['Pedestrian', 'pedestrian']: label = 'person' elif label == 'Car': label = 'car' elif label == 'Cyclist': label == 'bicycle' if label in OBSTACLE_LABELS: bounding_box_2D = BoundingBox2D(bbox[0], bbox[2], bbox[1], bbox[3]) bounding_box_3D = None if 'dim' in res and 'loc' in res and 'rot_y' in res: bounding_box_3D = BoundingBox3D.from_dimensions( res['dim'], res['loc'], res['rot_y']) obstacles.append( Obstacle(bounding_box_3D, score, label, track_id, bounding_box_2D=bounding_box_2D)) obstacle_tracking_stream.send( ObstaclesMessage(msg.timestamp, obstacles, 0)) def run_model(self, image_np, meta={}): images, meta = self.pre_process(image_np, meta) images = images.to(self.opt.device, non_blocking=self.opt.non_block_test) pre_hms, pre_inds = None, None if self.pre_images is None: self.pre_images = images self.tracker.init_track(meta['pre_dets'] if 'pre_dets' in meta else []) if self.opt.pre_hm: pre_hms, pre_inds = self._get_additional_inputs( self.tracker.tracks, meta, with_hm=not self.opt.zero_pre_hm) output, dets = self.process(images, self.pre_images, pre_hms, pre_inds) detections = self.post_process(dets, meta) # Filter out detections below threshold. detections = [ det for det in detections if det['score'] > self.opt.out_thresh ] torch.cuda.synchronize() public_det = meta['cur_dets'] if self.opt.public_det else None # Add tracking id to results. results = self.tracker.step(detections, public_det) self.pre_images = images return results def process(self, images, pre_images=None, pre_hms=None, pre_inds=None): from model.decode import generic_decode with torch.no_grad(): torch.cuda.synchronize() output = self.model(images, pre_images, pre_hms)[-1] output = self._sigmoid_output(output) output.update({'pre_inds': pre_inds}) if self.opt.flip_test: output = self._flip_output(output) torch.cuda.synchronize() dets = generic_decode(output, K=self.opt.K, opt=self.opt) torch.cuda.synchronize() for k in dets: dets[k] = dets[k].detach().cpu().numpy() return output, dets def pre_process(self, image, input_meta={}): """ Crop, resize, and normalize image. Gather meta data for post processing and tracking. """ from utils.image import get_affine_transform resized_image, c, s, inp_width, inp_height, height, width = \ self._transform_scale(image) trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) out_height = inp_height // self.opt.down_ratio out_width = inp_width // self.opt.down_ratio trans_output = get_affine_transform(c, s, 0, [out_width, out_height]) inp_image = cv2.warpAffine(resized_image, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR) inp_image = ((inp_image / 255. - self.mean) / self.std).astype( np.float32) images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) if self.opt.flip_test: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) meta = { 'calib': np.array(input_meta['calib'], dtype=np.float32) if 'calib' in input_meta else self._get_default_calib(width, height) } meta.update({ 'c': c, 's': s, 'height': height, 'width': width, 'out_height': out_height, 'out_width': out_width, 'inp_height': inp_height, 'inp_width': inp_width, 'trans_input': trans_input, 'trans_output': trans_output }) if 'pre_dets' in input_meta: meta['pre_dets'] = input_meta['pre_dets'] if 'cur_dets' in input_meta: meta['cur_dets'] = input_meta['cur_dets'] return images, meta def _get_default_calib(self, width, height): calib = np.array([[self.rest_focal_length, 0, width / 2, 0], [0, self.rest_focal_length, height / 2, 0], [0, 0, 1, 0]]) return calib def _transform_scale(self, image, scale=1): """ Prepare input image in different testing modes. Currently support: fix short size/ center crop to a fixed size/ keep original resolution but pad to a multiplication of 32. """ height, width = image.shape[0:2] new_height = int(height * scale) new_width = int(width * scale) if self.opt.fix_short > 0: if height < width: inp_height = self.opt.fix_short inp_width = (int(width / height * self.opt.fix_short) + 63) // 64 * 64 else: inp_height = (int(height / width * self.opt.fix_short) + 63) // 64 * 64 inp_width = self.opt.fix_short c = np.array([width / 2, height / 2], dtype=np.float32) s = np.array([width, height], dtype=np.float32) elif self.opt.fix_res: inp_height, inp_width = self.opt.input_h, self.opt.input_w c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) s = max(height, width) * 1.0 # s = np.array([inp_width, inp_height], dtype=np.float32) else: inp_height = (new_height | self.opt.pad) + 1 inp_width = (new_width | self.opt.pad) + 1 c = np.array([new_width // 2, new_height // 2], dtype=np.float32) s = np.array([inp_width, inp_height], dtype=np.float32) resized_image = cv2.resize(image, (new_width, new_height)) return resized_image, c, s, inp_width, inp_height, height, width def _sigmoid_output(self, output): if 'hm' in output: output['hm'] = output['hm'].sigmoid_() if 'hm_hp' in output: output['hm_hp'] = output['hm_hp'].sigmoid_() if 'dep' in output: output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1. output['dep'] *= self.opt.depth_scale return output def post_process(self, dets, meta): from utils.post_process import generic_post_process dets = generic_post_process(self.opt, dets, [meta['c']], [meta['s']], meta['out_height'], meta['out_width'], self.opt.num_classes, [meta['calib']], meta['height'], meta['width']) self.this_calib = meta['calib'] return dets[0]
class Detector(object): def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) def run(self, image_or_path_or_tensor, meta={}): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, track_time, tot_time, display_time = 0, 0, 0, 0 self.debugger.clear() start_time = time.time() # read image pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] # for multi-scale testing for scale in self.opt.test_scales: scale_start_time = time.time() if not pre_processed: # not prefetch testing or demo images, meta = self.pre_process(image, scale, meta) else: # prefetch testing images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} if 'pre_dets' in pre_processed_images['meta']: meta['pre_dets'] = pre_processed_images['meta']['pre_dets'] if 'cur_dets' in pre_processed_images['meta']: meta['cur_dets'] = pre_processed_images['meta']['cur_dets'] images = images.to(self.opt.device, non_blocking=self.opt.non_block_test) # initializing tracker pre_hms, pre_inds = None, None if self.opt.tracking: # initialize the first frame if self.pre_images is None: print('Initialize tracking!') self.pre_images = images self.tracker.init_track(meta['pre_dets'] if 'pre_dets' in meta else []) if self.opt.pre_hm: # render input heatmap from tracker status # pre_inds is not used in the current version. # We used pre_inds for learning an offset from previous image to # the current image. pre_hms, pre_inds = self._get_additional_inputs( self.tracker.tracks, meta, with_hm=not self.opt.zero_pre_hm) pre_process_time = time.time() pre_time += pre_process_time - scale_start_time # run the network # output: the output feature maps, only used for visualizing # dets: output tensors after extracting peaks output, dets, forward_time = self.process(images, self.pre_images, pre_hms, pre_inds, return_time=True) net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time # convert the cropped and 4x downsampled output coordinate system # back to the input image coordinate system result = self.post_process(dets, meta, scale) post_process_time = time.time() post_time += post_process_time - decode_time detections.append(result) if self.opt.debug >= 2: self.debug(self.debugger, images, result, output, scale, pre_images=self.pre_images if not self.opt.no_pre_img else None, pre_hms=pre_hms) # merge multi-scale testing results results = self.merge_outputs(detections) if self.opt.gpus[0] >= 0: torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time if self.opt.tracking: # public detection mode in MOT challenge public_det = meta['cur_dets'] if self.opt.public_det else None # add tracking id to results results = self.tracker.step(results, public_det) self.pre_images = images tracking_time = time.time() track_time += tracking_time - end_time tot_time += tracking_time - start_time if self.opt.debug >= 1: self.show_results(self.debugger, image, results) self.cnt += 1 show_results_time = time.time() display_time += show_results_time - end_time # return results and run time ret = { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time, 'track': track_time, 'display': display_time } if self.opt.save_video: try: # return debug image for saving video ret.update({'generic': self.debugger.imgs['generic']}) except: pass return ret def _transform_scale(self, image, scale=1): ''' Prepare input image in different testing modes. Currently support: fix short size/ center crop to a fixed size/ keep original resolution but pad to a multiplication of 32 ''' height, width = image.shape[0:2] new_height = int(height * scale) new_width = int(width * scale) if self.opt.fix_short > 0: if height < width: inp_height = self.opt.fix_short inp_width = (int(width / height * self.opt.fix_short) + 63) // 64 * 64 else: inp_height = (int(height / width * self.opt.fix_short) + 63) // 64 * 64 inp_width = self.opt.fix_short c = np.array([width / 2, height / 2], dtype=np.float32) s = np.array([width, height], dtype=np.float32) elif self.opt.fix_res: inp_height, inp_width = self.opt.input_h, self.opt.input_w c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) s = max(height, width) * 1.0 # s = np.array([inp_width, inp_height], dtype=np.float32) else: inp_height = (new_height | self.opt.pad) + 1 inp_width = (new_width | self.opt.pad) + 1 c = np.array([new_width // 2, new_height // 2], dtype=np.float32) s = np.array([inp_width, inp_height], dtype=np.float32) resized_image = cv2.resize(image, (new_width, new_height)) return resized_image, c, s, inp_width, inp_height, height, width def pre_process(self, image, scale, input_meta={}): ''' Crop, resize, and normalize image. Gather meta data for post processing and tracking. ''' resized_image, c, s, inp_width, inp_height, height, width = \ self._transform_scale(image) trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) out_height = inp_height // self.opt.down_ratio out_width = inp_width // self.opt.down_ratio trans_output = get_affine_transform(c, s, 0, [out_width, out_height]) inp_image = cv2.warpAffine(resized_image, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR) inp_image = ((inp_image / 255. - self.mean) / self.std).astype( np.float32) images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) if self.opt.flip_test: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) meta = {'calib': np.array(input_meta['calib'], dtype=np.float32) \ if 'calib' in input_meta else \ self._get_default_calib(width, height)} meta.update({ 'c': c, 's': s, 'height': height, 'width': width, 'out_height': out_height, 'out_width': out_width, 'inp_height': inp_height, 'inp_width': inp_width, 'trans_input': trans_input, 'trans_output': trans_output }) if 'pre_dets' in input_meta: meta['pre_dets'] = input_meta['pre_dets'] if 'cur_dets' in input_meta: meta['cur_dets'] = input_meta['cur_dets'] return images, meta def _trans_bbox(self, bbox, trans, width, height): ''' Transform bounding boxes according to image crop. ''' bbox = np.array(copy.deepcopy(bbox), dtype=np.float32) bbox[:2] = affine_transform(bbox[:2], trans) bbox[2:] = affine_transform(bbox[2:], trans) bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1) return bbox def _get_additional_inputs(self, dets, meta, with_hm=True): ''' Render input heatmap from previous trackings. ''' trans_input, trans_output = meta['trans_input'], meta['trans_output'] inp_width, inp_height = meta['inp_width'], meta['inp_height'] out_width, out_height = meta['out_width'], meta['out_height'] input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32) output_inds = [] for det in dets: if det['score'] < self.opt.pre_thresh or det['active'] == 0: continue bbox = self._trans_bbox(det['bbox'], trans_input, inp_width, inp_height) bbox_out = self._trans_bbox(det['bbox'], trans_output, out_width, out_height) h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] if (h > 0 and w > 0): radius = gaussian_radius((math.ceil(h), math.ceil(w))) radius = max(0, int(radius)) ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) ct_int = ct.astype(np.int32) if with_hm: draw_umich_gaussian(input_hm[0], ct_int, radius) ct_out = np.array([(bbox_out[0] + bbox_out[2]) / 2, (bbox_out[1] + bbox_out[3]) / 2], dtype=np.int32) output_inds.append(ct_out[1] * out_width + ct_out[0]) if with_hm: input_hm = input_hm[np.newaxis] if self.opt.flip_test: input_hm = np.concatenate((input_hm, input_hm[:, :, :, ::-1]), axis=0) input_hm = torch.from_numpy(input_hm).to(self.opt.device) output_inds = np.array(output_inds, np.int64).reshape(1, -1) output_inds = torch.from_numpy(output_inds).to(self.opt.device) return input_hm, output_inds def _get_default_calib(self, width, height): calib = np.array([[self.rest_focal_length, 0, width / 2, 0], [0, self.rest_focal_length, height / 2, 0], [0, 0, 1, 0]]) return calib def _sigmoid_output(self, output): if 'hm' in output: output['hm'] = output['hm'].sigmoid_() if 'hm_hp' in output: output['hm_hp'] = output['hm_hp'].sigmoid_() if 'dep' in output: output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1. output['dep'] *= self.opt.depth_scale return output def _flip_output(self, output): average_flips = ['hm', 'wh', 'dep', 'dim'] ##TODO consider tracking_wh neg_average_flips = ['amodel_offset'] single_flips = [ 'ltrb', 'nuscenes_att', 'velocity', 'ltrb_amodal', 'reg', 'hp_offset', 'rot', 'tracking', 'pre_hm' ] ## TODO consider iou for head in output: if head in average_flips: output[head] = (output[head][0:1] + flip_tensor(output[head][1:2])) / 2 if head in neg_average_flips: flipped_tensor = flip_tensor(output[head][1:2]) flipped_tensor[:, 0::2] *= -1 output[head] = (output[head][0:1] + flipped_tensor) / 2 if head in single_flips: output[head] = output[head][0:1] if head == 'hps': output['hps'] = (output['hps'][0:1] + flip_lr_off( output['hps'][1:2], self.flip_idx)) / 2 if head == 'hm_hp': output['hm_hp'] = (output['hm_hp'][0:1] + \ flip_lr(output['hm_hp'][1:2], self.flip_idx)) / 2 return output def process(self, images, pre_images=None, pre_hms=None, pre_inds=None, return_time=False): with torch.no_grad(): if self.opt.gpus[0] >= 0: torch.cuda.synchronize() output = self.model(images, pre_images, pre_hms)[-1] output = self._sigmoid_output(output) output.update({'pre_inds': pre_inds}) if self.opt.flip_test: output = self._flip_output(output) if self.opt.gpus[0] >= 0: torch.cuda.synchronize() forward_time = time.time() dets = generic_decode(output, K=self.opt.K, opt=self.opt) if self.opt.gpus[0] >= 0: torch.cuda.synchronize() for k in dets: dets[k] = dets[k].detach().cpu().numpy() if return_time: return output, dets, forward_time else: return output, dets def post_process(self, dets, meta, scale=1): dets = generic_post_process(self.opt, dets, [meta['c']], [meta['s']], meta['out_height'], meta['out_width'], self.opt.num_classes, [meta['calib']], meta['height'], meta['width']) self.this_calib = meta['calib'] if scale != 1: for i in range(len(dets[0])): for k in ['bbox', 'hps']: if k in dets[0][i]: dets[0][i][k] = (np.array(dets[0][i][k], np.float32) / scale).tolist() return dets[0] def merge_outputs(self, detections): assert len(self.opt.test_scales) == 1, 'multi_scale not supported!' results = [] for i in range(len(detections[0])): if detections[0][i]['score'] > self.opt.out_thresh: results.append(detections[0][i]) return results def debug(self, debugger, images, dets, output, scale=1, pre_images=None, pre_hms=None): img = images[0].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') if 'hm_hp' in output: pred = debugger.gen_colormap_hp( output['hm_hp'][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') if pre_images is not None: pre_img = pre_images[0].detach().cpu().numpy().transpose(1, 2, 0) pre_img = np.clip(((pre_img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img') if pre_hms is not None: pre_hm = debugger.gen_colormap( pre_hms[0].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm') def show_results(self, debugger, image, results): debugger.add_img(image, img_id='generic') # if self.opt.tracking: # debugger.add_img(self.pre_image_ori if self.pre_image_ori is not None else image, # img_id='previous') # self.pre_image_ori = image for j in range(len(results)): if results[j]['score'] > self.opt.vis_thresh: if 'active' in results[j] and results[j]['active'] == 0: continue item = results[j] if ('bbox' in item): sc = item['score'] if self.opt.demo == '' or \ not ('tracking_id' in item) else item['tracking_id'] sc = item[ 'tracking_id'] if self.opt.show_track_color else sc debugger.add_coco_bbox(item['bbox'], item['class'] - 1, sc, img_id='generic') if 'tracking' in item: debugger.add_arrow(item['ct'], item['tracking'], img_id='generic') tracking_id = item[ 'tracking_id'] if 'tracking_id' in item else -1 if 'tracking_id' in item and self.opt.demo == '' and \ not self.opt.show_track_color: debugger.add_tracking_id(item['ct'], item['tracking_id'], img_id='generic') if (item['class'] in [1, 2]) and 'hps' in item: debugger.add_coco_hp(item['hps'], tracking_id=tracking_id, img_id='generic') if len(results) > 0 and \ 'dep' in results[0] and 'alpha' in results[0] and 'dim' in results[0]: debugger.add_3d_detection( image if not self.opt.qualitative else cv2.resize( debugger.imgs['pred_hm'], (image.shape[1], image.shape[0])), False, results, self.this_calib, vis_thresh=self.opt.vis_thresh, img_id='ddd_pred') debugger.add_bird_view(results, vis_thresh=self.opt.vis_thresh, img_id='bird_pred', cnt=self.cnt) if self.opt.show_track_color and self.opt.debug == 4: del debugger.imgs['generic'], debugger.imgs['bird_pred'] if 'ddd_pred' in debugger.imgs: debugger.imgs['generic'] = debugger.imgs['ddd_pred'] if self.opt.debug == 4: debugger.save_all_imgs(self.opt.debug_dir, prefix='{}'.format(self.cnt)) else: debugger.show_all_imgs(pause=self.pause) def reset_tracking(self): self.tracker.reset() self.pre_images = None self.pre_image_ori = None
def main(opt): if opt.verbose: print("------------------------") print("RUNNING SET UP") print("------------------------") tf.logging.set_verbosity(40) random.seed(0) Tensor = torch.cuda.FloatTensor if opt.using_cuda else torch.FloatTensor os.makedirs(opt.output_folder, exist_ok=True) if opt.LSTM: opt.max_cosine_distance = 1 lstm = CombiLSTM() checkpoint = torch.load(opt.lstm_model) lstm.load_state_dict(checkpoint['state_dict']) if opt.using_cuda: lstm.cuda() lstm.eval() else: lstm = None if opt.combine_features: combination_model = CombiNet() checkpoint = torch.load(opt.combo_model) combination_model.load_state_dict(checkpoint['state_dict']) if opt.using_cuda: combination_model.cuda() combination_model.eval() else: combination_model = None dataset = SequenceDataset(opt.sequence_folder, point_cloud=opt.point_cloud, omni=opt.omni) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn = collate_fn) appearance_model = create_appearance_model(opt.appearance_model, opt.aligned_reid_ckpt, opt.resnet_reid_ckpt, opt.using_cuda) if opt.point_cloud: depth_model = create_depth_model(opt.depth_model, opt.depth_config_path) if opt.track_3d: tracker = Tracker_3d(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol, max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh, matching_strategy=opt.matching_strategy, gate_full_state=opt.gate_full_state, kf_vel_params=(opt.pos_weight_3d, opt.pos_weight, opt.vel_weight, opt.theta_weight, opt.kf_process, opt.kf_2d_meas, opt.kf_3d_meas, opt.initial_uncertainty), calib=dataset.calib, dummy_node_cost_iou=opt.dummy_node_cost_iou, dummy_node_cost_app=opt.dummy_node_cost_app, nn_budget=opt.nn_budget, use_imm=opt.use_imm, uncertainty_limit=opt.uncertainty_limit, gate_limit=opt.gate_limit, omni=opt.omni) else: tracker = Tracker(appearance_model=appearance_model, cuda=opt.using_cuda, JPDA = opt.JPDA, m_best_sol=opt.m_best_sol, max_age = opt.max_age, n_init=opt.n_init, assn_thresh=opt.assn_thresh, matching_strategy=opt.matching_strategy, kf_appearance_feature=opt.kf_appearance_feature, gate_full_state=opt.gate_full_state, kf_vel_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty), kf_walk_params=(opt.pos_weight, opt.vel_weight, opt.kf_process, opt.kf_2d_meas, opt.initial_uncertainty), calib=dataset.calib, dummy_node_cost_iou=opt.dummy_node_cost_iou, dummy_node_cost_app=opt.dummy_node_cost_app, nn_budget=opt.nn_budget, use_imm=opt.use_imm, uncertainty_limit=opt.uncertainty_limit, optical_flow=opt.optical_flow_initiation, gate_limit=opt.gate_limit) results = [] results_3d = [] n_frames = len(dataloader) if opt.log_data: full_log = [{'tracks':[], 'detections':[], 'detections_3d':[]} for _ in range(n_frames)] det_matrix = None seq_name = os.path.split(opt.sequence_folder)[-1] frame_times = [] if opt.verbose: print("------------------------") print("BEGINNING TRACKING OF SEQUENCE %s"%seq_name) print("------------------------") for frame_idx, img_path, input_img, point_cloud in tqdm(dataloader, ncols = 100, disable=not opt.verbose): # if frame_idx > 120: # break # elif frame_idx < 98: # continue if opt.log_data: full_log[frame_idx]['img_path'] = copy.copy(img_path) input_img = input_img.type(Tensor) if opt.reference: detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'det',opt.ref_det+'.txt'), frame_idx, det_matrix, threshold = 0, nms_threshold = opt.nms_thresh) elif opt.ground_truth: detections, object_ids, det_matrix = read_ground_truth_2d_detections(os.path.join(opt.sequence_folder,'gt','gt.txt'), frame_idx, det_matrix, nms_threshold = opt.nms_thresh) else: raise("Must specify ground truth or detections") # --- START OF TRACKING --- # start_time = time.time() if detections is None or len(detections)==0: tracker.predict() if opt.log_data: full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks) start_time = time.time() tracker.update(input_img, []) else: total_dets = len(detections) patches = get_image_patches(input_img, detections) appearance_features = generate_features_batched(appearance_model, patches, opt, object_ids) if opt.point_cloud: if not opt.omni: point_cloud = point_cloud[point_cloud[:,2]>=0] if opt.fpointnet: boxes_3d, valid_3d, _, scores_3d, depth_features = generate_detections_3d(depth_model, detections, np.asarray(point_cloud), dataset.calib, input_img.shape, peds='ped' in opt.ref_det or opt.omni) depth_features = convert_depth_features(depth_features, valid_3d) else: boxes_3d, valid_3d = read_ground_truth_3d_detections(os.path.join(opt.sequence_folder,'gt','3d_detections.txt'), frame_idx) features, appearance_features = combine_features(appearance_features, depth_features, valid_3d, combination_model, depth_weight = opt.depth_weight) # boxes_3d = boxes_3d[valid_3d != -1] # Old and buggy way of handling missing box # detections = detections[valid_3d != -1] if np.any(valid_3d == -1): compare_2d = True else: compare_2d = False if len(boxes_3d) > 0: detections_3d = [] for idx, box in enumerate(boxes_3d): if valid_3d[idx] == -1: detections_3d.append(None) else: detections_3d.append(np.array(box).astype(np.float32)) else: detections_3d = None else: appearance_features = [appearance_features[i] for i in range(total_dets)] features = [None]*len(appearance_features) compare_2d = True detections_3d = None detections = convert_detections(detections, features, appearance_features, detections_3d) tracker.predict() if opt.log_data: full_log[frame_idx]['predicted_tracks'] = copy.deepcopy(tracker.tracks) start_time = time.time() tracker.update(input_img, detections, compare_2d) # --- END OF TRACKING --- end_time = time.time() frame_times.append(end_time - start_time) if opt.log_data: full_tracks = copy.deepcopy(tracker.tracks) temp_tracks = [] for track in full_tracks: bbox = track.to_tlwh(None) if not (bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10): temp_tracks.append(track) full_log[frame_idx]['tracks'] = temp_tracks full_log[frame_idx]['detections'] = copy.deepcopy(detections) for track in tracker.tracks: if opt.track_3d: bbox_3d = track.to_tlwh3d() else: bbox = track.to_tlwh(None) if bbox[0] < 0-10 or bbox[1] < 0-10 or bbox[0] + bbox[2] > input_img.shape[2]+10 or bbox[1] + bbox[3] > input_img.shape[1]+10: continue bbox[0] = max(0,bbox[0]) # Frame adjustments bbox[1] = max(0,bbox[1]) bbox[2] = min(bbox[0]+bbox[2], input_img.shape[2])-bbox[0] bbox[3] = min(bbox[1]+bbox[3], input_img.shape[1])-bbox[1] track_status = 1 if not track.is_confirmed(): # or track.time_since_update > 0: if opt.near_online: if not track.is_confirmed(): track_status = 0 else: track_status = 2 continue else: continue if opt.near_online: if opt.track_3d: results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6], track_status]) else: results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track_status]) if track_status == 1: #updates 0s for row_i in range(len(results)): if results[row_i][1] == track.track_id: results[row_i][6] = 1 if opt.point_cloud: if results_3d[row_i][1] == track.track_id: results_3d[row_i][7] = 1 else: if opt.track_3d: results_3d.append([frame_idx, track.track_id, bbox_3d[0], bbox_3d[1], bbox_3d[2], bbox_3d[3], bbox_3d[4], bbox_3d[5], bbox_3d[6]]) else: results.append([frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]]) # if opt.point_cloud: frame_times = np.asarray(frame_times) if opt.verbose: print("------------------------") print("COMPLETED TRACKING, SAVING RESULTS") print("------------------------") print('\n\n','Total Tracking Time:',np.sum(frame_times),'Average Time Per Frame:',np.mean(frame_times)) if opt.track_3d: output_file_3d = os.path.join(opt.output_folder, seq_name+"_3d.txt") if len(results_3d) > 0: with open(output_file_3d, 'w+') as f: for row in results_3d: if opt.near_online and row[9] != 1: continue print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.4f,1,1,1,-1' % ( row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8]), file=f) else: output_file = os.path.join(opt.output_folder, seq_name+".txt") if len(results) > 0: with open(output_file, 'w+') as f: for row in results: if opt.near_online and row[6] != 1: continue print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,1,1,-1' % ( row[0], row[1], row[2], row[3], row[4], row[5]), file=f) if opt.log_data: output_file = os.path.join(opt.output_folder, seq_name+".p") with open(output_file, 'wb') as f: pickle.dump(full_log, f)
def __init__(self, camera_stream, obstacle_tracking_stream, flags, camera_setup): from dataset.dataset_factory import get_dataset from model.model import create_model, load_model from opts import opts from utils.tracker import Tracker camera_stream.add_callback(self.on_frame_msg, [obstacle_tracking_stream]) self._flags = flags self._logger = erdos.utils.setup_logging(self.config.name, self.config.log_file_name) self._csv_logger = erdos.utils.setup_csv_logging( self.config.name + '-csv', self.config.csv_log_file_name) self._camera_setup = camera_setup # TODO(ionel): Might have to filter labels when running with a coco # and a nuscenes model. num_classes = { 'kitti_tracking': 3, 'coco': 90, 'mot': 1, 'nuscenes': 10 } # Other flags: # 1) --K ; max number of output objects. # 2) --fix_short ; resizes the height of the image to fix short, and # the width such the aspect ratio is maintained. # 3) --pre_hm ; pre heat map. # 4) --input_w; str(camera_setup.width) # 5) --input_h; str(camera_setup.height) args = [ 'tracking', '--load_model', flags.center_track_model_path, '--dataset', flags.center_track_model, '--test_focal_length', str(int(camera_setup.get_focal_length())), '--out_thresh', str(flags.obstacle_detection_min_score_threshold), '--pre_thresh', str(flags.obstacle_detection_min_score_threshold), '--new_thresh', str(flags.obstacle_detection_min_score_threshold), '--track_thresh', str(flags.obstacle_detection_min_score_threshold), '--max_age', str(flags.obstacle_track_max_age), '--num_classes', str(num_classes[flags.center_track_model]), '--tracking', '--hungarian' ] opt = opts().init(args) gpu = True if gpu: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.opt = opt self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(self.opt.device) self.model.eval() self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt)