def run_sequence(self, seq, visualization=None, debug=None, visdom_info=None, multiobj_mode=None): """Run tracker on sequence. args: seq: Sequence to run the tracker on. visualization: Set visualization flag (None means default value specified in the parameters). debug: Set debug level (None means default value specified in the parameters). visdom_info: Visdom info. multiobj_mode: Which mode to use for multiple objects. """ params = self.get_parameters() visualization_ = visualization debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) if visualization is None: if debug is None: visualization_ = getattr(params, 'visualization', False) else: visualization_ = True if debug else False params.visualization = visualization_ params.debug = debug_ self._init_visdom(visdom_info, debug_) if visualization_ and self.visdom is None: self.init_visualization() # Get init information init_info = seq.init_info() is_single_object = not seq.multiobj_mode if multiobj_mode is None: multiobj_mode = getattr( params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default' or is_single_object: tracker = self.create_tracker(params) elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom) else: raise ValueError( 'Unknown multi object mode {}'.format(multiobj_mode)) if not self.if_rt: output = self._track_sequence(tracker, seq, init_info) else: output = self._track_sequence_rt(tracker, seq, init_info) return output
def run_webcam(self, debug=None, visdom_info=None): """Run the tracker with the webcam. args: debug: Debug level. """ params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) class UIControl: def __init__(self): self.mode = 'init' # init, select, track self.target_tl = (-1, -1) self.target_br = (-1, -1) self.new_init = False def mouse_callback(self, event, x, y, flags, param): if event == cv.EVENT_LBUTTONDOWN and self.mode == 'init': self.target_tl = (x, y) self.target_br = (x, y) self.mode = 'select' elif event == cv.EVENT_MOUSEMOVE and self.mode == 'select': self.target_br = (x, y) elif event == cv.EVENT_LBUTTONDOWN and self.mode == 'select': self.target_br = (x, y) self.mode = 'init' self.new_init = True def get_tl(self): return self.target_tl if self.target_tl[0] < self.target_br[0] else self.target_br def get_br(self): return self.target_br if self.target_tl[0] < self.target_br[0] else self.target_tl def get_bb(self): tl = self.get_tl() br = self.get_br() bb = [min(tl[0], br[0]), min(tl[1], br[1]), abs(br[0] - tl[0]), abs(br[1] - tl[1])] return bb ui_control = UIControl() cap = cv.VideoCapture(0) display_name = 'Display: ' + self.name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) cv.setMouseCallback(display_name, ui_control.mouse_callback) next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() while True: # Capture frame-by-frame ret, frame = cap.read() frame_disp = frame.copy() info = OrderedDict() info['previous_output'] = prev_output if ui_control.new_init: ui_control.new_init = False init_state = ui_control.get_bb() info['init_object_ids'] = [next_object_id, ] info['init_bbox'] = OrderedDict({next_object_id: init_state}) sequence_object_ids.append(next_object_id) next_object_id += 1 # Draw box if ui_control.mode == 'select': cv.rectangle(frame_disp, ui_control.get_tl(), ui_control.get_br(), (255, 0, 0), 2) if len(sequence_object_ids) > 0: info['sequence_object_ids'] = sequence_object_ids out = tracker.track(frame, info) prev_output = OrderedDict(out) if 'segmentation' in out: frame_disp = overlay_mask(frame_disp, out['segmentation']) if 'target_bbox' in out: for obj_id, state in out['target_bbox'].items(): state = [int(s) for s in state] cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), _tracker_disp_colors[obj_id], 5) # Put text font_color = (0, 0, 0) cv.putText(frame_disp, 'Select target', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press q to quit', (20, 85), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) # Display the resulting frame cv.imshow(display_name, frame_disp) key = cv.waitKey(1) if key == ord('q'): break elif key == ord('r'): next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() info = OrderedDict() info['object_ids'] = [] info['init_object_ids'] = [] info['init_bbox'] = OrderedDict() tracker.initialize(frame, info) ui_control.mode = 'init' # When everything done, release the capture cap.release() cv.destroyAllWindows()
def run_video(self, videofilepath, optional_box=None, debug=None, visdom_info=None, save_results=False): """Run the tracker with the vieofile. args: debug: Debug level. """ params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) if hasattr(tracker, 'initialize_features'): tracker.initialize_features() elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) assert os.path.isfile(videofilepath), "Invalid param {}".format(videofilepath) ", videofilepath must be a valid videofile" output_boxes = [] cap = cv.VideoCapture(videofilepath) display_name = 'Display: ' + tracker.params.tracker_name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) success, frame = cap.read() cv.imshow(display_name, frame) def _build_init_info(box): return {'init_bbox': OrderedDict({1: box}), 'init_object_ids': [1, ], 'object_ids': [1, ], 'sequence_object_ids': [1, ]} if success is not True: print("Read frame from {} failed.".format(videofilepath)) exit(-1) if optional_box is not None: assert isinstance(optional_box, (list, tuple)) assert len(optional_box) == 4, "valid box's foramt is [x,y,w,h]" tracker.initialize(frame, _build_init_info(optional_box)) output_boxes.append(optional_box) else: while True: # cv.waitKey() frame_disp = frame.copy() cv.putText(frame_disp, 'Select target ROI and press ENTER', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 1) x, y, w, h = cv.selectROI(display_name, frame_disp, fromCenter=False) init_state = [x, y, w, h] tracker.initialize(frame, _build_init_info(init_state)) output_boxes.append(init_state) break while True: ret, frame = cap.read() if frame is None: break frame_disp = frame.copy() # Draw box out = tracker.track(frame) state = [int(s) for s in out['target_bbox'][1]] output_boxes.append(state) cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), (0, 255, 0), 5) font_color = (0, 0, 0) cv.putText(frame_disp, 'Tracking!', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press q to quit', (20, 80), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) # Display the resulting frame cv.imshow(display_name, frame_disp) key = cv.waitKey(1) if key == ord('q'): break elif key == ord('r'): ret, frame = cap.read() frame_disp = frame.copy() cv.putText(frame_disp, 'Select target ROI and press ENTER', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 1) cv.imshow(display_name, frame_disp) x, y, w, h = cv.selectROI(display_name, frame_disp, fromCenter=False) init_state = [x, y, w, h] tracker.initialize(frame, _build_init_info(init_state)) output_boxes.append(init_state) # When everything done, release the capture cap.release() cv.destroyAllWindows() if save_results: if not os.path.exists(self.results_dir): os.makedirs(self.results_dir) video_name = Path(videofilepath).stem base_results_path = os.path.join(self.results_dir, 'video_{}'.format(video_name)) tracked_bb = np.array(output_boxes).astype(int) bbox_file = '{}.txt'.format(base_results_path) np.savetxt(bbox_file, tracked_bb, delimiter='\t', fmt='%d')
def run_video_no_display(self, videofilepath, output_dir, optional_box=None, debug=None, visdom_info=None, save_results=False): """Run the tracker with the vieofile. args: debug: Debug level. """ params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) if hasattr(tracker, 'initialize_features'): tracker.initialize_features() elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) # assert os.path.isfile(videofilepath), "Invalid param {}".format(videofilepath) # ", videofilepath must be a valid videofile" output_boxes = [] # cap = cv.VideoCapture(videofilepath) # display_name = 'Display: ' + tracker.params.tracker_name # cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) # cv.resizeWindow(display_name, 960, 720) # success, frame = cap.read() # frame_size = frame.shape # video_writer = cv.VideoWriter(output_video, cv.VideoWriter_fourcc(*'DIVX'), 30, (frame_size[1], frame_size[0])) # cv.imshow(display_name, frame) video_writer = None def _build_init_info(box): return {'init_bbox': OrderedDict({1: box}), 'init_object_ids': [1, ], 'object_ids': [1, ], 'sequence_object_ids': [1, ]} # if success is not True: # print("Read frame from {} failed.".format(videofilepath)) # exit(-1) # if optional_box is not None: # assert isinstance(optional_box, (list, tuple)) # assert len(optional_box) == 4, "valid box's foramt is [x,y,w,h]" # tracker.initialize(frame, _build_init_info(optional_box)) # output_boxes.append(optional_box) frame_count = 0 if not os.path.isfile(os.path.abspath(output_dir)) and not os.path.exists(os.path.abspath(output_dir)): os.mkdir(os.path.abspath(output_dir)) for frame in self.get_frames(videofilepath): if frame_count == 0: frame_size = frame.shape video_writer = cv.VideoWriter(os.path.join(output_dir, "result.avi"), cv.VideoWriter_fourcc(*'DIVX'), 30, (frame_size[1], frame_size[0])) if optional_box is not None: assert isinstance(optional_box, (list, tuple)) assert len(optional_box) == 4, "valid box's foramt is [x,y,w,h]" tracker.initialize(frame, _build_init_info(optional_box)) output_boxes.append(optional_box) frame_count += 1 if frame is None: return frame_disp = frame.copy() # Draw box out = tracker.track(frame) # print(out) import time state = [int(s) for s in out['target_bbox'][1]] output_boxes.append(state) cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), (0, 255, 0), 5) font_color = (0, 0, 0) cv.putText(frame_disp, 'Tracking!', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) # Save the resulting frame cv.imwrite(os.path.join(output_dir, '{0:05d}'.format(frame_count) + ".jpg"), frame_disp) video_writer.write(frame_disp) # When everything done, release the capture cap.release() video_writer.release() if save_results: if not os.path.exists(self.results_dir): os.makedirs(self.results_dir) video_name = Path(videofilepath).stem base_results_path = os.path.join(self.results_dir, 'video_{}'.format(video_name)) tracked_bb = np.array(output_boxes).astype(int) bbox_file = '{}.txt'.format(base_results_path) np.savetxt(bbox_file, tracked_bb, delimiter='\t', fmt='%d')
def run_webcam(self, debug=None, visdom_info=None): """Run the tracker with the webcam. args: debug: Debug level. """ def yolo_search(W, H, frame_yolo): fl = 0 # if the frame dimensions are empty, grab them if W is None or H is None: (H, W) = frame_yolo.shape[:2] # construct a blob from the input frame and then perform a forward # pass of the YOLO object detector, giving us our bounding boxes # and associated probabilities blob = cv.dnn.blobFromImage(frame_yolo, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) layerOutputs = net.forward(ln) # initialize our lists of detected bounding boxes, confidences, # and class IDs, respectively boxes = [] confidences = [] classIDs = [] # loop over each of the layer outputs for output in layerOutputs: # loop over each of the detections for detection in output: # extract the class ID and confidence (i.e., probability) # of the current object detection scores = detection[5:] classID = np.argmax(scores) confidence = scores[classID] # filter weak prediction and unrelated classes if classID not in outdoor_classes and confidence > 0.5: # scale the bounding box coordinates back relative to # the size of the image, keeping in mind that YOLO # actually returns the center (x, y)-coordinates of # the bounding box followed by the boxes' width and # height box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") # use the center (x, y)-coordinates to derive the top # and and left corner of the bounding box x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) # update our list of bounding box coordinates, # confidences, and class IDs boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) classIDs.append(classID) # apply non-maxima suppression to suppress weak, overlapping # bounding boxes idxs = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3) # ensure at least one detection exists if len(idxs) > 0: # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # draw a bounding box rectangle and label on the frame color = [int(c) for c in COLORS[classIDs[i]]] cv.rectangle(frame_yolo, (x, y), (x + w, y + h), color, 2) text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) cv.putText(frame_yolo, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) if classIDs[i] == 0: detection_flag = 1 tl_coor = (x, y) # top left coordinates br_coor = ((x + w), (y + h)) # bottom right coordinates coordinates_text = "{} {}".format(tl_coor, br_coor) cv.rectangle(frame_yolo, tl_coor, br_coor, (255, 255, 255), 2) fl = 1 return tl_coor, br_coor, detection_flag, frame_yolo if fl == 0: return (0, 0), (0, 0), 0, frame_yolo # load the COCO class labels our YOLO model was trained on # and the classes that wont be used (coco.names contains the names) # Init a detection flag det_flag = 0 labelsPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/coco.names" LABELS = open(labelsPath).read().strip().split("\n") outdoor_classes = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 19, 20, 21, 22, 23, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38] # initialize a list of colors to represent each possible class label np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # derive the paths to the YOLO weights and model configuration weightsPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/yolov3.weights" configPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/yolov3.cfg" # load our YOLO object detector trained on COCO dataset (80 classes) print("[INFO] loading YOLO from disk...") net = cv.dnn.readNetFromDarknet(configPath, weightsPath) # determine only the output layers from yolo ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] (W, H) = (None, None) temp_flag = 0 params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) class UIControl: def __init__(self): self.mode = 'init' # init, select, track self.new_init = False def get_bb(self): # yolo bb if det_flag == 1: tl = tl_yolo br = br_yolo bb = [min(tl[0], br[0]), min(tl[1], br[1]), abs(br[0] - tl[0]), abs(br[1] - tl[1])] return bb ui_control = UIControl() cap = cv.VideoCapture(0) display_name = 'Display: ' + self.name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() while True: # Capture frame-by-frame ret, frame = cap.read() frame_disp = frame.copy() tl_yolo, br_yolo, det_flag, frame_yolo = yolo_search(W, H, frame.copy()) info = OrderedDict() info['previous_output'] = prev_output # If there's a human detection, show it if det_flag == 1 and temp_flag == 0: init_state = ui_control.get_bb() info['init_object_ids'] = [next_object_id, ] info['init_bbox'] = OrderedDict({next_object_id: init_state}) sequence_object_ids.append(next_object_id) next_object_id += 1 temp_flag = 1 if len(sequence_object_ids) > 0: info['sequence_object_ids'] = sequence_object_ids out = tracker.track(frame, info) prev_output = OrderedDict(out) if 'segmentation' in out: frame_disp = overlay_mask(frame_disp, out['segmentation']) if 'target_bbox' in out: for obj_id, state in out['target_bbox'].items(): state = [int(s) for s in state] cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), _tracker_disp_colors[obj_id], 5) # Put text font_color = (0, 0, 0) cv.putText(frame_disp, 'Press r to reset', (20, 25), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press q to quit', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) # Display the resulting frame cv.imshow(display_name, frame_disp) cv.imshow("YOLO", frame_yolo) key = cv.waitKey(1) if key == ord('q'): break elif key == ord('r'): next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() info = OrderedDict() info['object_ids'] = [] info['init_object_ids'] = [] info['init_bbox'] = OrderedDict() tracker.initialize(frame, info) ui_control.mode = 'init' # When everything done, release the capture cap.release() cv.destroyAllWindows()
def run_video(self, videofilepath, optional_box=None, debug=None, visdom_info=None, save_results=False): """Run the tracker with the vieofile. args: debug: Debug level. """ def yolo_search(W, H, frame_yolo): # if the frame dimensions are empty, grab them if W is None or H is None: (H, W) = frame_yolo.shape[:2] # construct a blob from the input frame and then perform a forward # pass of the YOLO object detector, giving us our bounding boxes # and associated probabilities blob = cv.dnn.blobFromImage(frame_yolo, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) layerOutputs = net.forward(ln) # initialize our lists of detected bounding boxes, confidences, # and class IDs, respectively boxes = [] confidences = [] classIDs = [] # loop over each of the layer outputs for output in layerOutputs: # loop over each of the detections for detection in output: # extract the class ID and confidence (i.e., probability) # of the current object detection scores = detection[5:] classID = np.argmax(scores) confidence = scores[classID] # filter weak prediction and unrelated classes if classID not in outdoor_classes and confidence > 0.5: # scale the bounding box coordinates back relative to # the size of the image, keeping in mind that YOLO # actually returns the center (x, y)-coordinates of # the bounding box followed by the boxes' width and # height box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") # use the center (x, y)-coordinates to derive the top # and and left corner of the bounding box x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) # update our list of bounding box coordinates, # confidences, and class IDs boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) classIDs.append(classID) # apply non-maxima suppression to suppress weak, overlapping # bounding boxes idxs = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3) # ensure at least one detection exists if len(idxs) > 0: # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # draw a bounding box rectangle and label on the frame color = [int(c) for c in COLORS[classIDs[i]]] cv.rectangle(frame_yolo, (x, y), (x + w, y + h), color, 2) text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) cv.putText(frame_yolo, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) if classIDs[i] == 45: # 0 - person, 65 - remote 45 - bowl detection_flag = 1 tl_coor = (x, y) # top left coordinates br_coor = ((x + w), (y + h)) # bottom right coordinates cv.rectangle(frame_yolo, tl_coor, br_coor, (255, 255, 255), 2) return tl_coor, br_coor, detection_flag, frame_yolo return (0, 0), (0, 0), 0, frame_yolo # load the COCO class labels our YOLO model was trained on # and the classes that wont be used (coco.names contains the names) # Init a detection flag det_flag = 0 stop_yolo = 0 labelsPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/coco.names" LABELS = open(labelsPath).read().strip().split("\n") outdoor_classes = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 19, 20, 21, 22, 23, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38] # initialize a list of colors to represent each possible class label np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # derive the paths to the YOLO weights and model configuration weightsPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/yolov3.weights" configPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/yolov3.cfg" # load our YOLO object detector trained on COCO dataset (80 classes) print("[INFO] loading YOLO from disk...") net = cv.dnn.readNetFromDarknet(configPath, weightsPath) # determine only the output layers from yolo ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] (W, H) = (None, None) params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) if hasattr(tracker, 'initialize_features'): tracker.initialize_features() elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) assert os.path.isfile(videofilepath), "Invalid param {}".format(videofilepath) ", videofilepath must be a valid videofile" output_boxes = [] cap = cv.VideoCapture(videofilepath) display_name = 'Display: ' + tracker.params.tracker_name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) success, frame = cap.read() cv.imshow(display_name, frame) def _build_init_info(box): return {'init_bbox': OrderedDict({1: box}), 'init_object_ids': [1, ], 'object_ids': [1, ], 'sequence_object_ids': [1, ]} if success is not True: print("Read frame from {} failed.".format(videofilepath)) exit(-1) while True: ret, frame = cap.read() if frame is None: break frame_disp = frame.copy() if W is None or H is None: (H, W) = frame_disp.shape[:2] if stop_yolo == 0: tl_yolo, br_yolo, det_flag, frame_disp = yolo_search(W, H, frame.copy()) cv.putText(frame_disp, "Searching: BOWL", (50, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) if det_flag == 1: if stop_yolo == 0: stop_yolo = 1 x = tl_yolo[0] y = tl_yolo[1] w = abs(br_yolo[0] - tl_yolo[0]) h = abs(br_yolo[1] - tl_yolo[1]) init_state = [x, y, w, h] tracker.initialize(frame, _build_init_info(init_state)) output_boxes.append(init_state) # Draw box out = tracker.track(frame) state = [int(s) for s in out['target_bbox'][1]] output_boxes.append(state) tl = (state[0], state[1]) br = (state[2] + state[0], state[3] + state[1]) w = state[2] h = state[3] cv.rectangle(frame_disp, tl, br, (0, 255, 0), 5) center = (int(tl[0] + w/2), int(tl[1] + h/2)) cv.circle(frame_disp, center, 3, (0, 0, 255), -1) cv.putText(frame_disp, "FOUND BOWL", (50, 50), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) if center[0] < W*0.40: cv.putText(frame_disp, "MOVE LEFT", (50, 150), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) elif center[0] > W*0.60: cv.putText(frame_disp, "MOVE RIGHT", (450, 150), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) if center[1] < H*0.40: cv.putText(frame_disp, "MOVE UP", (200, 50), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) elif center[1] > H*0.60: cv.putText(frame_disp, "MOVE DOWN", (200, 300), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) if w*h < W*H*0.05: cv.putText(frame_disp, "MOVE FORWARD", (int(W/2), int(H/2)), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) elif w*h > W*H*0.15: cv.putText(frame_disp, "MOVE BACK", (int(W/2), int(H/2)), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # Display the resulting frame cv.imshow(display_name, frame_disp) key = cv.waitKey(1) if key == ord('q'): break # When everything done, release the capture cap.release() cv.destroyAllWindows() if save_results: if not os.path.exists(self.results_dir): os.makedirs(self.results_dir) video_name = Path(videofilepath).stem base_results_path = os.path.join(self.results_dir, 'video_{}'.format(video_name)) tracked_bb = np.array(output_boxes).astype(int) bbox_file = '{}.txt'.format(base_results_path) np.savetxt(bbox_file, tracked_bb, delimiter='\t', fmt='%d')