def __getitem__(self, item): """ :param item: int, video id :return: image_files annos meta (optional) """ record = COCODataset.data_items[item] image_file = record["file_name"] img_h = record["height"] img_w = record["width"] anno = record['annotations'] if self._hyper_params["with_mask"]: mask_anno = [] for obj in anno: raw_mask = obj['segmentation'] mask = self._generate_mask_from_anno(raw_mask, img_h, img_w) mask_anno.append(mask) sequence_data = dict(image=[image_file], anno=mask_anno) else: box_anno = [] for obj in anno: box_anno.append(obj['bbox']) if len(box_anno) <= 0: box_anno = self._DUMMY_ANNO box_anno = xywh2xyxy(box_anno) sequence_data = dict(image=[image_file], anno=box_anno) return sequence_data
def __getitem__(self, item: int) -> Dict: img_files, anno = self._state["dataset"][item] anno = xywh2xyxy(anno) sequence_data = dict(image=img_files, anno=anno) return sequence_data
def __call__(self, sampled_data): data1 = sampled_data["data1"] data2 = sampled_data["data2"] im_temp, mask_temp = data1["image"], data1["anno"] bbox_temp = cv2.boundingRect(mask_temp) bbox_temp = xywh2xyxy(bbox_temp) im_curr, mask_curr = data2["image"], data2["anno"] bbox_curr = cv2.boundingRect(mask_curr) bbox_curr = xywh2xyxy(bbox_curr) data_dict = crop_track_pair_for_sat(im_temp, bbox_temp, im_curr, bbox_curr, config=self._hyper_params, mask_tmp=mask_temp, mask_curr=mask_curr) if sampled_data["is_negative_pair"]: data_dict["seg_mask"] *= 0 return data_dict
def __getitem__(self, item): """ :param item: int, video id :return: image_files annos meta (optional) """ subset = self._hyper_params["subset"] im_name = self.im_names[item] image_file = DETDataset.data_dict[subset][im_name]["image_file"] anno = DETDataset.data_dict[subset][im_name]["anno"] if len(anno) <= 0: anno = self._DUMMY_ANNO anno = xywh2xyxy(anno) sequence_data = dict(image=[image_file], anno=anno) return sequence_data
def main(args): root_cfg = cfg root_cfg.merge_from_file(args.config) logger.info("Load experiment configuration at: %s" % args.config) # resolve config root_cfg = complete_path_wt_root_in_cfg(root_cfg, ROOT_PATH) root_cfg = root_cfg.test task, task_cfg = specify_task(root_cfg) task_cfg.freeze() window_name = task_cfg.exp_name # build model model = model_builder.build(task, task_cfg.model) # build pipeline pipeline = pipeline_builder.build(task, task_cfg.pipeline, model) dev = torch.device(args.device) pipeline.set_device(dev) init_box = None template = None if len(args.init_bbox) == 4: init_box = args.init_bbox vw = None resize_ratio = args.resize dump_only = args.dump_only # create video stream if args.video == "webcam": logger.info("Starting video stream...") vs = cv2.VideoCapture(0) vs.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) elif not osp.isfile(args.video): logger.info("Starting from video frame image files...") vs = ImageFileVideoStream(args.video, init_counter=args.start_index) else: logger.info("Starting from video file...") vs = cv2.VideoCapture(args.video) # create video writer to output video if args.output: if osp.isdir(args.output): vw = ImageFileVideoWriter(args.output) else: fourcc = cv2.VideoWriter_fourcc(*'MJPG') width, height = vs.get(3), vs.get(4) vw = cv2.VideoWriter( args.output, fourcc, 25, (int(width * resize_ratio), int(height * resize_ratio))) # loop over sequence while vs.isOpened(): key = 255 ret, frame = vs.read() logger.debug("frame: {}".format(ret)) if ret: if template is not None: time_a = time.time() rect_pred = pipeline.update(frame) logger.debug(rect_pred) show_frame = frame.copy() time_cost = time.time() - time_a bbox_pred = xywh2xyxy(rect_pred) bbox_pred = tuple(map(int, bbox_pred)) cv2.putText(show_frame, "track cost: {:.4f} s".format(time_cost), (128, 20), cv2.FONT_HERSHEY_COMPLEX, font_size, (0, 0, 255), font_width) cv2.rectangle(show_frame, bbox_pred[:2], bbox_pred[2:], (0, 255, 0)) if template is not None: show_frame[:128, :128] = template else: show_frame = frame show_frame = cv2.resize( show_frame, (int(show_frame.shape[1] * resize_ratio), int(show_frame.shape[0] * resize_ratio))) # resize if not dump_only: cv2.imshow(window_name, show_frame) if vw is not None: vw.write(show_frame) # catch key if if (init_box is None) or (vw is None): logger.debug("Press key s to select object.") key = cv2.waitKey(30) & 0xFF logger.debug("key: {}".format(key)) if key == ord("q"): break # if the 's' key is selected, we are going to "select" a bounding # box to track elif key == ord("s"): # select the bounding box of the object we want to track (make # sure you press ENTER or SPACE after selecting the ROI) logger.debug("Select object to track") box = cv2.selectROI(window_name, frame, fromCenter=False, showCrosshair=True) if box[2] > 0 and box[3] > 0: init_box = box elif key == ord("c"): logger.debug( "init_box/template released, press key s to select object.") init_box = None template = None if (init_box is not None) and (template is None): template = cv2.resize( frame[int(init_box[1]):int(init_box[1] + init_box[3]), int(init_box[0]):int(init_box[0] + init_box[2])], (128, 128)) pipeline.init(frame, init_box) logger.debug( "pipeline initialized with bbox : {}".format(init_box)) vs.release() if vw is not None: vw.release() cv2.destroyAllWindows()