def draw_data(self): disp_image = self.raw_data[0].copy() resize_factor = 1 if max(disp_image.shape) > 480: resize_factor = 480.0 / float(max(disp_image.shape)) disp_image = cv2.resize(disp_image, None, fx=resize_factor, fy=resize_factor) for i, mask in enumerate(self.raw_data[2]): self.raw_data[2][i] = cv2.resize(mask, None, fx=resize_factor, fy=resize_factor) boxes = [resize_factor * b.clone() for b in self.raw_data[1]] for i, disp_rect in enumerate(boxes): color = ((255 * ((i % 3) > 0)), 255 * ((i + 1) % 2), (255 * (i % 5)) // 4) cv2.rectangle(disp_image, (int(disp_rect[0]), int(disp_rect[1])), (int(disp_rect[0] + disp_rect[2]), int(disp_rect[1] + disp_rect[3])), color, 2) for i, mask in enumerate(self.raw_data[2], 1): disp_image = overlay_mask(disp_image, mask * i) disp_image = numpy_to_torch(disp_image).squeeze(0) disp_image = disp_image.float() self.visdom.image(disp_image, opts={'title': self.title}, win=self.title)
def run_webcam(self, debug=None, visdom_info=None): """Run the tracker with the webcam. args: debug: Debug level. """ params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) class UIControl: def __init__(self): self.mode = 'init' # init, select, track self.target_tl = (-1, -1) self.target_br = (-1, -1) self.new_init = False def mouse_callback(self, event, x, y, flags, param): if event == cv.EVENT_LBUTTONDOWN and self.mode == 'init': self.target_tl = (x, y) self.target_br = (x, y) self.mode = 'select' elif event == cv.EVENT_MOUSEMOVE and self.mode == 'select': self.target_br = (x, y) elif event == cv.EVENT_LBUTTONDOWN and self.mode == 'select': self.target_br = (x, y) self.mode = 'init' self.new_init = True def get_tl(self): return self.target_tl if self.target_tl[0] < self.target_br[0] else self.target_br def get_br(self): return self.target_br if self.target_tl[0] < self.target_br[0] else self.target_tl def get_bb(self): tl = self.get_tl() br = self.get_br() bb = [min(tl[0], br[0]), min(tl[1], br[1]), abs(br[0] - tl[0]), abs(br[1] - tl[1])] return bb ui_control = UIControl() cap = cv.VideoCapture(0) display_name = 'Display: ' + self.name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) cv.setMouseCallback(display_name, ui_control.mouse_callback) next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() while True: # Capture frame-by-frame ret, frame = cap.read() frame_disp = frame.copy() info = OrderedDict() info['previous_output'] = prev_output if ui_control.new_init: ui_control.new_init = False init_state = ui_control.get_bb() info['init_object_ids'] = [next_object_id, ] info['init_bbox'] = OrderedDict({next_object_id: init_state}) sequence_object_ids.append(next_object_id) next_object_id += 1 # Draw box if ui_control.mode == 'select': cv.rectangle(frame_disp, ui_control.get_tl(), ui_control.get_br(), (255, 0, 0), 2) if len(sequence_object_ids) > 0: info['sequence_object_ids'] = sequence_object_ids out = tracker.track(frame, info) prev_output = OrderedDict(out) if 'segmentation' in out: frame_disp = overlay_mask(frame_disp, out['segmentation']) if 'target_bbox' in out: for obj_id, state in out['target_bbox'].items(): state = [int(s) for s in state] cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), _tracker_disp_colors[obj_id], 5) # Put text font_color = (0, 0, 0) cv.putText(frame_disp, 'Select target', (20, 30), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press r to reset', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press q to quit', (20, 85), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) # Display the resulting frame cv.imshow(display_name, frame_disp) key = cv.waitKey(1) if key == ord('q'): break elif key == ord('r'): next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() info = OrderedDict() info['object_ids'] = [] info['init_object_ids'] = [] info['init_bbox'] = OrderedDict() tracker.initialize(frame, info) ui_control.mode = 'init' # When everything done, release the capture cap.release() cv.destroyAllWindows()
def run_webcam(self, debug=None, visdom_info=None): """Run the tracker with the webcam. args: debug: Debug level. """ def yolo_search(W, H, frame_yolo): fl = 0 # if the frame dimensions are empty, grab them if W is None or H is None: (H, W) = frame_yolo.shape[:2] # construct a blob from the input frame and then perform a forward # pass of the YOLO object detector, giving us our bounding boxes # and associated probabilities blob = cv.dnn.blobFromImage(frame_yolo, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) layerOutputs = net.forward(ln) # initialize our lists of detected bounding boxes, confidences, # and class IDs, respectively boxes = [] confidences = [] classIDs = [] # loop over each of the layer outputs for output in layerOutputs: # loop over each of the detections for detection in output: # extract the class ID and confidence (i.e., probability) # of the current object detection scores = detection[5:] classID = np.argmax(scores) confidence = scores[classID] # filter weak prediction and unrelated classes if classID not in outdoor_classes and confidence > 0.5: # scale the bounding box coordinates back relative to # the size of the image, keeping in mind that YOLO # actually returns the center (x, y)-coordinates of # the bounding box followed by the boxes' width and # height box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") # use the center (x, y)-coordinates to derive the top # and and left corner of the bounding box x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) # update our list of bounding box coordinates, # confidences, and class IDs boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) classIDs.append(classID) # apply non-maxima suppression to suppress weak, overlapping # bounding boxes idxs = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3) # ensure at least one detection exists if len(idxs) > 0: # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # draw a bounding box rectangle and label on the frame color = [int(c) for c in COLORS[classIDs[i]]] cv.rectangle(frame_yolo, (x, y), (x + w, y + h), color, 2) text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i]) cv.putText(frame_yolo, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) if classIDs[i] == 0: detection_flag = 1 tl_coor = (x, y) # top left coordinates br_coor = ((x + w), (y + h)) # bottom right coordinates coordinates_text = "{} {}".format(tl_coor, br_coor) cv.rectangle(frame_yolo, tl_coor, br_coor, (255, 255, 255), 2) fl = 1 return tl_coor, br_coor, detection_flag, frame_yolo if fl == 0: return (0, 0), (0, 0), 0, frame_yolo # load the COCO class labels our YOLO model was trained on # and the classes that wont be used (coco.names contains the names) # Init a detection flag det_flag = 0 labelsPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/coco.names" LABELS = open(labelsPath).read().strip().split("\n") outdoor_classes = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 19, 20, 21, 22, 23, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38] # initialize a list of colors to represent each possible class label np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # derive the paths to the YOLO weights and model configuration weightsPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/yolov3.weights" configPath = "/home/ebo/Parrot_CV_Project/local_yolo/yolo-coco/yolov3.cfg" # load our YOLO object detector trained on COCO dataset (80 classes) print("[INFO] loading YOLO from disk...") net = cv.dnn.readNetFromDarknet(configPath, weightsPath) # determine only the output layers from yolo ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] (W, H) = (None, None) temp_flag = 0 params = self.get_parameters() debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) params.debug = debug_ params.tracker_name = self.name params.param_name = self.parameter_name self._init_visdom(visdom_info, debug_) multiobj_mode = getattr(params, 'multiobj_mode', getattr(self.tracker_class, 'multiobj_mode', 'default')) if multiobj_mode == 'default': tracker = self.create_tracker(params) elif multiobj_mode == 'parallel': tracker = MultiObjectWrapper(self.tracker_class, params, self.visdom, fast_load=True) else: raise ValueError('Unknown multi object mode {}'.format(multiobj_mode)) class UIControl: def __init__(self): self.mode = 'init' # init, select, track self.new_init = False def get_bb(self): # yolo bb if det_flag == 1: tl = tl_yolo br = br_yolo bb = [min(tl[0], br[0]), min(tl[1], br[1]), abs(br[0] - tl[0]), abs(br[1] - tl[1])] return bb ui_control = UIControl() cap = cv.VideoCapture(0) display_name = 'Display: ' + self.name cv.namedWindow(display_name, cv.WINDOW_NORMAL | cv.WINDOW_KEEPRATIO) cv.resizeWindow(display_name, 960, 720) next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() while True: # Capture frame-by-frame ret, frame = cap.read() frame_disp = frame.copy() tl_yolo, br_yolo, det_flag, frame_yolo = yolo_search(W, H, frame.copy()) info = OrderedDict() info['previous_output'] = prev_output # If there's a human detection, show it if det_flag == 1 and temp_flag == 0: init_state = ui_control.get_bb() info['init_object_ids'] = [next_object_id, ] info['init_bbox'] = OrderedDict({next_object_id: init_state}) sequence_object_ids.append(next_object_id) next_object_id += 1 temp_flag = 1 if len(sequence_object_ids) > 0: info['sequence_object_ids'] = sequence_object_ids out = tracker.track(frame, info) prev_output = OrderedDict(out) if 'segmentation' in out: frame_disp = overlay_mask(frame_disp, out['segmentation']) if 'target_bbox' in out: for obj_id, state in out['target_bbox'].items(): state = [int(s) for s in state] cv.rectangle(frame_disp, (state[0], state[1]), (state[2] + state[0], state[3] + state[1]), _tracker_disp_colors[obj_id], 5) # Put text font_color = (0, 0, 0) cv.putText(frame_disp, 'Press r to reset', (20, 25), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) cv.putText(frame_disp, 'Press q to quit', (20, 55), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, font_color, 1) # Display the resulting frame cv.imshow(display_name, frame_disp) cv.imshow("YOLO", frame_yolo) key = cv.waitKey(1) if key == ord('q'): break elif key == ord('r'): next_object_id = 1 sequence_object_ids = [] prev_output = OrderedDict() info = OrderedDict() info['object_ids'] = [] info['init_object_ids'] = [] info['init_bbox'] = OrderedDict() tracker.initialize(frame, info) ui_control.mode = 'init' # When everything done, release the capture cap.release() cv.destroyAllWindows()