def get_scaled_bboxes(filename, sf): bbox_filename = bbox_writer.get_bbox_filename(filename) bbox_path = os.path.join(os.path.dirname(filename), bbox_filename) bboxes_, classes = bbox_writer.read_bboxes(bbox_path) bboxes = drawing_utils.scale_bboxes(bboxes_, sf) return bboxes, classes
def save_frame(orig, frame, bboxes, classes, run_name, frame_count): # Scale the bboxes back down by original scale factor. # This gives us the tight bounding box for the object, rather than the one # which has been scaled for the tracker. bboxes = drawing_utils.scale_bboxes(bboxes, 1 / args.scale) frame_to_draw = orig.copy() drawing_utils.draw_bboxes(frame_to_draw, bboxes, classes) show_scaled("Saved Frame", frame_to_draw) cv2.imwrite(os.path.join(run_name, "%05d.png" % frame_count), orig) cv2.imwrite(os.path.join(run_name, "rect_%05d.png" % frame_count), frame) bbox_writer.write_bboxes(bboxes, classes, os.path.join(run_name, "%05d.txt" % frame_count))
def showROI(bboxes, frame): scaled_bboxes = drawing_utils.scale_bboxes(bboxes, 1.2) h, w, _ = frame.shape for i, bbox in enumerate(scaled_bboxes): if bbox is None: continue # Grab the part that we care about. rounded_bbox = bbox.astype(int) top_left = rounded_bbox[:2] bottom_right = top_left + rounded_bbox[2:] xs = np.clip([top_left[0], bottom_right[0]], 0, w) ys = np.clip([top_left[1], bottom_right[1]], 0, h) roi = frame[ys[0]:ys[1], xs[0]:xs[1]] IMAGE_SIZE = 100 roi_h, roi_w, _ = roi.shape sf = IMAGE_SIZE / min(roi_h, roi_w) roi = cv2.resize(roi, (0, 0), fx=sf, fy=sf) cv2.imshow("Image %d" % i, roi)
def refine_bboxes(bboxes, classes, frame, trackers): # Refine boxes and reinitialize trackers. # Boxes are refined to be as tight as possible to the object being tracked. # The tracker is then given the bbox which has been inflated by the original # scale factor, to preserve tracking quality. # Just in case the tracker is missing something, we scale even further to # determine our ROI. scaled_bboxes = drawing_utils.scale_bboxes(bboxes, 1.2) h, w, _ = frame.shape # Very much hard coded for our particular use case. for i, bbox in enumerate(scaled_bboxes): if bbox is None: continue # Grab the part that we care about. rounded_bbox = bbox.astype(int) top_left = rounded_bbox[:2] bottom_right = top_left + rounded_bbox[2:] xs = np.clip([top_left[0], bottom_right[0]], 0, w) ys = np.clip([top_left[1], bottom_right[1]], 0, h) roi = frame[ys[0]:ys[1], xs[0]:xs[1]] # Resize the roi to be a reasonable dimension to see # Make the smaller of the two dimensions a fixed size IMAGE_SIZE = 100 roi_h, roi_w, _ = roi.shape sf = IMAGE_SIZE / min(roi_h, roi_w) roi = cv2.resize(roi, (0, 0), fx=sf, fy=sf) new_bbox = None cls = classes[i] if cls == 'w': # TODO: Tune parameters here, if necessary print("Refining white whiffle ball") gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) min_radius = IMAGE_SIZE // 4 circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, dp=1, minDist=IMAGE_SIZE/2, param1=30, param2=50, minRadius=min_radius, maxRadius=IMAGE_SIZE//2) if circles is None: print("NO CIRCLES DETECTED. UHHHH") continue # Find the biggest circle by area, aka biggest radius biggest_circle_index = np.argmax(circles[0, :, 2]) biggest_circle = circles[0, biggest_circle_index] c = biggest_circle if (c[2] < min_radius): print("Got an invalid circle?") continue # draw the outer circle and a dot at the center cv2.circle(roi, (c[0], c[1]), c[2], (0, 255, 0), 2) cv2.circle(roi, (c[0], c[1]), 2, (0, 0, 255), 3) # Use the bounding box of the circle to reinitialize the tracker. new_bbox = np.array([c[0] - c[2], c[1] - c[2], 2 * c[2], 2 * c[2]]) elif cls == 'c': print("Refining orange cube") hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) hsv_blurred = cv2.GaussianBlur(hsv, (5, 5), 0) ret, thresh_h = cv2.threshold(hsv_blurred[:, :, 0], 30, 255, cv2.THRESH_BINARY_INV) ret, thresh_s = cv2.threshold(hsv_blurred[:, :, 1], 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) mask = cv2.bitwise_and(thresh_h, thresh_s) # Clean up the mask a little kernel = np.ones((11,11),np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel) # cv2.imshow("Opening", opening) roi = cv2.bitwise_and(roi, roi, mask=mask) print("made the roi from the mask") # Grab the bounding box from the mask conn_stats = cv2.connectedComponentsWithStats(mask, connectivity=4) retval, labels, stats, centroids = conn_stats # The stats tell us [top left, top right, width, height, area] # Find the label with the biggest area if len(stats) > 1: # Means we have a non-bg label biggest_label = np.argmax(stats[1:, -1]) + 1 p1 = stats[biggest_label, :2] p2 = p1 + stats[biggest_label, 2:-1] cv2.rectangle(roi, tuple(p1.astype(int)), tuple(p2.astype(int)), color=(255, 0, 100)) print("drew the rectangle") new_bbox = stats[biggest_label, :-1] cv2.imshow("Image %d" % i, roi) if new_bbox is None: continue print("New bounding box", new_bbox) new_bbox = new_bbox / sf # Unscale by the same amount we scaled new_bbox = np.array([*(top_left + new_bbox[:2]), *new_bbox[2:]]) print("Replacing bbox %d" % i, rounded_bbox, new_bbox) # Scale the bbox by the proper scale factor new_bbox_scaled = drawing_utils.scale_bboxes([new_bbox], args.scale) new_bbox_scaled = clamp_bboxes(new_bbox_scaled, w, h) # Force the scaled bounding box to be inside the bounds of the image. # if any(new_bbox < 0): # input() print("Initializing tracker") # Apply the new scaled bbox to both the tracker and the saved ones new_tracker = init_trackers(args.tracker, frame, new_bbox_scaled)[0] trackers[i] = new_tracker bboxes[i] = new_bbox_scaled[0] print("new scaled bbox", bboxes[i])