def match_sift_features(tiles_dir, features_dir, working_dir, jar_file): # create a workspace directory if not found if not os.path.exists(working_dir): os.makedirs(working_dir) tile_files = glob.glob(os.path.join(tiles_dir, '*')) features_files = glob.glob(os.path.join(features_dir, '*')) tiles = load_entire_data(tile_files, features_files) # TODO: add all tiles to a kd-tree so it will be faster to find overlap between tiles # iterate over the tiles, and for each tile, find intersecting tiles that overlap, # and match their features # Nested loop: # for each tile_i in range[0..N): # for each tile_j in range[tile_i..N)] for pair in itertools.combinations(tiles, 2): # if the two tiles intersect, match them bbox1 = BoundingBox(tiles[pair[0]]['boundingBox']) bbox2 = BoundingBox(tiles[pair[1]]['boundingBox']) if bbox1.overlap(bbox2): print "Matching sift of tiles: {0} and {1}".format(pair[0], pair[1]) match_two_tiles_sift_features(tiles[pair[0]], tiles[pair[1]], jar_file, working_dir)
def match_by_max_pmcc(tiles_file, fixed_tiles, out_fname, jar_file, conf_fname=None, threads_num=None): tile_file = tiles_file.replace('file://', '') with open(tile_file, 'r') as data_file: tilespecs = json.load(data_file) # TODO: add all tiles to a kd-tree so it will be faster to find overlap between tiles # TODO: limit searches for matches to overlap area of bounding boxes # iterate over the tiles, and for each tile, find intersecting tiles that overlap, # and match their features # Nested loop: # for each tile_i in range[0..N): # for each tile_j in range[tile_i..N)] indices = [] for idx1 in range(len(tilespecs)): for idx2 in range(idx1 + 1, len(tilespecs)): # if the two tiles intersect, match them bbox1 = BoundingBox.fromList(tilespecs[idx1]["bbox"]) bbox2 = BoundingBox.fromList(tilespecs[idx2]["bbox"]) if bbox1.overlap(bbox2): imageUrl1 = tilespecs[idx1]["mipmapLevels"]["0"]["imageUrl"] imageUrl2 = tilespecs[idx2]["mipmapLevels"]["0"]["imageUrl"] print "Matching by max pmcc: {0} and {1}".format( imageUrl1, imageUrl2) indices.append((idx1, idx2)) match_multiple_pmcc(tiles_file, indices, fixed_tiles, jar_file, out_fname, conf_fname, threads_num)
def match_by_max_pmcc(tiles_dir, working_dir, jar_file): # create a workspace directory if not found if not os.path.exists(working_dir): os.makedirs(working_dir) tile_files = glob.glob(os.path.join(tiles_dir, '*')) tiles = load_entire_data(tile_files) # TODO: add all tiles to a kd-tree so it will be faster to find overlap between tiles # iterate over the tiles, and for each tile, find intersecting tiles that overlap, # and match their features # Nested loop: # for each tile_i in range[0..N): # for each tile_j in range[tile_i..N)] for pair in itertools.combinations(tiles, 2): # if the two tiles intersect, match them bbox1 = BoundingBox(tiles[pair[0]]['boundingBox']) bbox2 = BoundingBox(tiles[pair[1]]['boundingBox']) if bbox1.overlap(bbox2): print "Matching by max pmcc tiles: {0} and {1}".format( pair[0], pair[1]) match_two_tiles_by_max_pmcc_features(tiles[pair[0]], tiles[pair[1]], jar_file, working_dir)
def bounding_box_naive_handler(event): box = BoundingBox(convex_hull) plot_data.clean_plot(which='lines') box.append(box[0]) ax.plot(*zip(*box)) fig.canvas.draw() fig.canvas.flush_events()
def __init__(self, solid): assert isinstance(solid, TopoDS_Solid) self._solid = copy.deepcopy(solid) props = GlobalProperties(solid) x1, y1, z1, x2, y2, z2 = props.bbox() xmin = min(x1, x2) xmax = max(x1, x2) ymin = min(y1, y2) ymax = max(y1, y2) zmin = min(z1, z2) zmax = max(z1, z2) self._bbox = BoundingBox(xmin, ymin, zmin, xmax, ymax, zmax)
def _generate_boxes(self) -> None: for i in tqdm(range(len(self._templates))): h, w = np.array(self._templates[i].shape) * self._best_scale if self._is_staff: self._boxes.append([ BoundingBox(0, pt[0], self._reader.img_width, h) for pt in zip(*self._best_locations[i]) ]) else: self._boxes.append([ BoundingBox(pt[1], pt[0], w, h) for pt in zip(*self._best_locations[i]) ]) self._boxes = [j for i in self._boxes for j in i]
def fg_bboxes(seg, frame_id, params): bboxes = [] roi = cv2.imread(params['roi_path'], cv2.IMREAD_GRAYSCALE) / 255 segmentation = seg * roi contours, _ = cv2.findContours(segmentation.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) idx = 0 for c in contours: rect = cv2.boundingRect(c) if rect[2] < 50 or rect[3] < 50 or rect[2] / rect[3] < 0.8: continue # Discard small contours x, y, w, h = rect # TODO: pillar bicis a gt bboxes.append( BoundingBox(id=idx, label='car', frame=frame_id, xtl=x, ytl=y, xbr=x + w, ybr=y + h)) idx += 1 return discard_overlapping_bboxes(bboxes)
def read_annotations(path, grouped=True, use_parked=False): with open(path) as f: tracks = xmltodict.parse(f.read())['annotations']['track'] annotations = [] for track in tracks: id = track['@id'] label = track['@label'] if label != 'car': continue for box in track['box']: is_parked = box['attribute']['#text'] == 'true' if not use_parked and is_parked: continue annotations.append(BoundingBox( id=int(id), label=label, frame=int(box['@frame']), xtl=float(box['@xtl']), ytl=float(box['@ytl']), xbr=float(box['@xbr']), ybr=float(box['@ybr']), occluded=box['@occluded'] == '1', parked=is_parked )) if grouped: return group_by_frame(annotations) return annotations
def read_detections(path, grouped=False, confidenceThr=0.5): """ Format: <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> """ with open(path) as f: lines = f.readlines() detections = [] for line in lines: det = line.split(sep=',') if float(det[6]) >= confidenceThr: detections.append(BoundingBox( id=int(det[1]), label='car', frame=int(det[0]) - 1, xtl=float(det[2]), ytl=float(det[3]), xbr=float(det[2]) + float(det[4]), ybr=float(det[3]) + float(det[5]), confidence=float(det[6]) )) if grouped: return group_by_frame(detections) return detections
class Node(): def __init__(self, solid): assert isinstance(solid, TopoDS_Solid) self._solid = copy.deepcopy(solid) props = GlobalProperties(solid) x1, y1, z1, x2, y2, z2 = props.bbox() xmin = min(x1, x2) xmax = max(x1, x2) ymin = min(y1, y2) ymax = max(y1, y2) zmin = min(z1, z2) zmax = max(z1, z2) self._bbox = BoundingBox(xmin, ymin, zmin, xmax, ymax, zmax) def solid(self): return self._solid def bbox(self): return self._bbox def __hash__(self): return self._bbox.__hash__() # return self._solid.__hash__() + self._bbox.__hash__() def __eq__(self, other): bbox_eq = self._bbox == other.bbox() return bbox_eq # Ignore the shapes for now. The equality is based on memory location, so doesn't work # and i haven't found another way to check for geometry equality yet. In theory the bbox # should be enough anyway for this use case def __ne__(self, other): return not self.__eq__(other)
def detect(self, image): resizedImage = Image.fromarray(image).resize( (self._inputWidth, self._inputHeight), Image.ANTIALIAS) self._setInputTensor(resizedImage) startTime = time.monotonic() self._interpreter.invoke() elapsedTime = (time.monotonic() - startTime) * 1000 logging.info("%.0f ms", elapsedTime) boxes = self._getOutputTensor(0) classes = self._getOutputTensor(1) scores = self._getOutputTensor(2) count = int(self._getOutputTensor(3)) results = [] for i in range(count): if scores[i] < self._scoreThreshold: continue if classes[i] not in self._interestedClasses: continue ymin, xmin, ymax, xmax = boxes[i] xmin = int(xmin * image.shape[1]) xmax = int(xmax * image.shape[1]) ymin = int(ymin * image.shape[0]) ymax = int(ymax * image.shape[0]) results.append(BoundingBox(xmin, ymin, xmax, ymax)) return results
def _json_to_bboxes(line): bboxes = [] json_dicts = json.loads(line) for curr_dict in json_dicts: bbox = BoundingBox(curr_dict["left"], curr_dict["right"], curr_dict["top"], curr_dict["bottom"], curr_dict["label"]) bboxes.append(bbox) return bboxes
def load_tiles(tiles_spec_fname, bbox): relevant_tiles = [] with open(tiles_spec_fname, 'r') as data_file: data = json.load(data_file) for tile in data: tile_bbox = BoundingBox(tile['boundingBox']) if bbox.overlap(tile_bbox): relevant_tiles.append(tile) return relevant_tiles
def filter_tiles(tiles_fname, out_fname, bbox): # parse the bounding box arguments bbox = BoundingBox.fromStr(bbox) # load all tiles from the tile-spec json file that are relevant to our bounding box relevant_tiles = load_tiles(tiles_fname, bbox) # Create a tile-spec file that includes all relevant tiles with open(out_fname, 'w') as outfile: json.dump(relevant_tiles, outfile, sort_keys=True, indent=4)
def update_bbox(self, bbox): img_h, img_w, _ = self.image.shape offset_x1 = (bbox[0] - 0.25) * img_w offset_x2 = (bbox[2] - 0.75) * img_w offset_y1 = (bbox[1] - 0.25) * img_h offset_y2 = (bbox[3] - 0.75) * img_h if not self.predicted_bbox: new_x1 = self.roi.x1 + offset_x1 new_x2 = self.roi.x2 + offset_x2 new_y1 = self.roi.y1 + offset_y1 new_y2 = self.roi.y2 + offset_y2 self.predicted_bbox = BoundingBox(new_x1, new_y1, new_x2, new_y2) else: new_x1 = self.predicted_bbox.x1 + offset_x1 new_x2 = self.predicted_bbox.x2 + offset_x2 new_y1 = self.predicted_bbox.y1 + offset_y1 new_y2 = self.predicted_bbox.y2 + offset_y2 self.predicted_bbox.update_coordinates(new_x1, new_x2, new_y1, new_y2)
def get_background_map_image_and_bounding_box(X): try: # Fetch map using TomTom Maps API bbox = get_bounding_box(X) background_image = get_tomtom_map_raster_image(bbox.min_lon, bbox.min_lat, bbox.max_lon, bbox.max_lat) except OSError as e: # Alternatively, use the offline version print('Error: "{}"; using pre-fetched image...'.format(e)) bbox = BoundingBox(20.39, 44.78, 20.52, 44.84) background_image = plt.imread('staticimage.png') return background_image, bbox
def get_bounding_box(X): min_lat = min(X[:, 0]) max_lat = max(X[:, 0]) min_lon = min(X[:, 1]) max_lon = max(X[:, 1]) bbox = BoundingBox(min_lon=min_lon - 0.05*(max_lon-min_lon), min_lat=min_lat - 0.05*(max_lat-min_lat), max_lon=max_lon + 0.05*(max_lon-min_lon), max_lat=max_lat + 0.05*(max_lat-min_lat)) return bbox
class Solid(): """ Quick wrapper for TopoDS_Solid so we can have hashing and equality that doesn't depend on memory locations """ def __init__(self, solid): assert isinstance(solid, TopoDS_Solid) self._solid = solid props = GlobalProperties(solid) x1, y1, z1, x2, y2, z2 = props.bbox() xmin = min(x1, x2) xmax = max(x1, x2) ymin = min(y1, y2) ymax = max(y1, y2) zmin = min(z1, z2) zmax = max(z1, z2) self._bbox = BoundingBox(xmin, ymin, zmin, xmax, ymax, zmax) def bbox(self): return self._bbox def solid(self): return self._solid def __hash__(self): # TODO: remove this function? Or force to not be used? return self._bbox.__hash__() def __eq__(self, other): self_com = GpropsFromShape(self._solid).volume().CentreOfMass() other_com = GpropsFromShape(other._solid).volume().CentreOfMass() # Not ideal for equality, but we need to handle minor deviations # TODO: does this tolerance really need to be so big? return self._bbox.eq_within_tolerance( other._bbox, tolerance=0.1) and distance(self_com, other_com) < 0.1 def __ne__(self, other): return not self.__eq__(other)
def detect(self, frame: VideoFrame, input_w=256, input_h=256, class_threshold=0.6, labels=["person"], anchors=[[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]): """ Bemeneti parméterek: input_w/h: modell bemeneti mérete class_treshold: ennyi konfidencia felett tartjuk meg a jelölt osztályokat labels: ezeket ismeri fel (be lehet rakni csomó mindent, fun) anchors: ezek alapján képzi le a BB-ket Feldolgozás lépései: 1. Kép betöltése, előfeldolgozása 2. Modell futtatása 3. BoundigBox-ok előállítása 4. BB méret korrekció 5. átfedések kezelése 4. BB címkézése Kimenet: boxes: befoglaló doboz labels: predikált osztály (nálunk ugye ez mindig person lesz, ezért kivehető akár) scores: ~konfidencia """ image, image_w, image_h = self.load_image_pixels( frame, (input_w, input_h)) yhat = self.model.predict(image) boxes = list() for i in range(len(yhat)): boxes += yolo.decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w) self.correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w) yolo.do_nms(boxes, 0.5) boxes, labels, scores = self.get_boxes(boxes, labels, class_threshold) ret_boxes = [] for box in boxes: y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax width, height = x2 - x1, y2 - y1 ret_boxes.append(BoundingBox(x1, y1, width, height)) return ret_boxes, scores
def filter_tiles(tiles_fname, working_dir, bbox): # parse the bounding box arguments bbox = BoundingBox(bbox) # create a workspace directory if not found if not os.path.exists(working_dir): os.makedirs(working_dir) # load all tiles from the tile-spec json file that are relevant to our bounding box relevant_tiles = load_tiles(tiles_fname, bbox) # Create a tile-spec file for each relevant tile create_single_tile_specs(relevant_tiles, working_dir)
def to_bounding_box( gt_boxes: List[Dict[str, torch.Tensor]], pred_boxes: List[Dict[str, torch.Tensor]] ) -> Tuple[List[BoundingBox], List[BoundingBox]]: gt_parsed_boxes, pred_parsed_boxes = [], [] # for each image for i, (gts, preds) in enumerate(zip(gt_boxes, pred_boxes)): image_name = str(i) # process ground truth bounding boxes boxes = list(gts['boxes'].detach().cpu().numpy()) classes = list(gts['labels'].detach().cpu().numpy()) for box, box_class in zip(boxes, classes): coords = tuple(box) b = BoundingBox(image_name=image_name, class_id=box_class, coordinates=coords, type_coordinates=CoordinatesType.ABSOLUTE, bb_type=BBType.GROUND_TRUTH, format=BBFormat.XYX2Y2) gt_parsed_boxes.append(b) # process detections boxes = list(preds['boxes'].detach().cpu().numpy()) classes = list(preds['labels'].detach().cpu().numpy()) conf = list(preds['scores'].detach().cpu().numpy()) for box, box_class, box_confidence in zip(boxes, classes, conf): coords = tuple(box) b = BoundingBox(image_name=image_name, class_id=box_class, coordinates=coords, confidence=box_confidence, type_coordinates=CoordinatesType.ABSOLUTE, bb_type=BBType.DETECTED, format=BBFormat.XYX2Y2) pred_parsed_boxes.append(b) return gt_parsed_boxes, pred_parsed_boxes
def add_noise(annotations, noise_params, num_frames): # to generate a BB randomly in an image, we use the mean width and # height of the annotated BBs so that they have similar statistics mean_w = 0 mean_h = 0 noisy_annotations = [] # add noise to existing BBs (annotations) for box in annotations: # remove BB if np.random.random() <= noise_params['drop']: continue # generate BB close to an existing one if np.random.random() <= noise_params['generate_close']: new_box = deepcopy(box) noise['gaussian'](new_box, noise_params) noisy_annotations.append(new_box) b = deepcopy(box) # add noise to existing BB if noise_params['type'] is not None: noise[noise_params['type']](b, noise_params) noisy_annotations.append(b) mean_w += box.width mean_h += box.height mean_w /= len(annotations) mean_h /= len(annotations) # generate random BBs in random frames for frame_id in range(num_frames): if np.random.random() <= noise_params['generate_random']: # center of the BB (cx, cy), width and height (w, h) cx = np.random.randint(mean_w // 2, img_shape[1] - mean_w // 2) cy = np.random.randint(mean_h // 2, img_shape[0] - mean_h // 2) w = np.random.normal(mean_w, 10) h = np.random.normal(mean_h, 10) noisy_annotations.append( BoundingBox(id=-1, label='car', frame=frame_id, xtl=cx - w / 2, ytl=cy - h / 2, xbr=cx + w / 2, ybr=cy + h / 2)) return noisy_annotations
def detect_russian_word(self, color_image, depth_image): """ Detect words in given image. Returns ------- A list of box objects that contain desired text """ # filter image _, filter_image = cv2.threshold(np.mean(color_image, axis=2), 185, 255, cv2.THRESH_BINARY) # shows what the filtered image looks like # cv2.imshow('img', filter_image) # cv2.waitKey(0) ## only return boxes that have text in them ## eg. find a way to check if boxes are repetitive or do not contain text d = pytesseract.image_to_data(filter_image, output_type=pytesseract.Output.DICT, lang="uzb_cyrl") n_boxes = len(d['level']) box_obs = [] contents = d['text'] # print(contents) for i in range(n_boxes): if not contents[i]: continue else: for j in contents[i]: if j in self.text: # print(contents[i]) (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) # cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) verts = [(x, y), (x + w, y), (x, y + h), (x + w, y + h)] cv2.rectangle(filter_image, verts[0], verts[-1], (0, 255, 0), 2) box = BoundingBox(verts, ObjectType('text')) box_obs.append(box) break # cv2.imshow('img', filter_image) # cv2.waitKey(0) return box_obs
def match_sift_features(tiles_file, features_file, out_fname, jar_file, conf=None): tilespecs, feature_indices = load_data_files(tiles_file, features_file) for k, v in feature_indices.iteritems(): print k, v # TODO: add all tiles to a kd-tree so it will be faster to find overlap between tiles # TODO: limit searches for matches to overlap area of bounding boxes # iterate over the tiles, and for each tile, find intersecting tiles that overlap, # and match their features # Nested loop: # for each tile_i in range[0..N): # for each tile_j in range[tile_i..N)] indices = [] for pair in itertools.combinations(tilespecs, 2): # if the two tiles intersect, match them bbox1 = BoundingBox.fromList(pair[0]["bbox"]) bbox2 = BoundingBox.fromList(pair[1]["bbox"]) if bbox1.overlap(bbox2): imageUrl1 = pair[0]["mipmapLevels"]["0"]["imageUrl"] imageUrl2 = pair[1]["mipmapLevels"]["0"]["imageUrl"] print "Matching sift of tiles: {0} and {1}".format( imageUrl1, imageUrl2) idx1 = feature_indices[imageUrl1] idx2 = feature_indices[imageUrl2] indices.append((idx1, idx2)) conf_args = utils.conf_args(conf, 'MatchSiftFeatures') match_multiple_sift_features(tiles_file, features_file, indices, jar_file, out_fname, conf_args)
def execute_bounding_box(self, goal): self.bb_active = True mybb = BoundingBox(self.bb_done_callback, self.im_server, goal.image) rr = rospy.Rate(self.loop_rate) while (self.bb_active and (not self.bb_server.is_preempt_requested()) and (not rospy.is_shutdown())): rr.sleep() if self.bb_server.is_preempt_requested(): mybb.cancel() self.bb_server.set_preempted() else: result = mybb.get_result() if result is None: self.bb_server.set_aborted() else: (row1, row2, col1, col2) = result resp = BoundingBoxResult() resp.min_row.data = row1 resp.max_row.data = row2 resp.min_col.data = col1 resp.max_col.data = col2 self.bb_server.set_succeeded(resp)
def split_heights(bounding_boxes, evaled_avg_line_height): idx = 0 splitted = [] while idx < len(bounding_boxes) - 1: box = bounding_boxes[idx] height = box.h n = int(height / evaled_avg_line_height) if n > 1: new_height = int(round(height / n)) bounding_boxes.pop(idx) splitted += [BoundingBox((box.x, box.y + incr, box.w, new_height)) for incr in range(0, int(new_height * n), new_height)] else: idx += 1 return sorted(bounding_boxes + splitted, key=lambda x: x.y)
def detections_from_csv(csv_filepath: str) -> List[Detection]: """ reads a detection list from csv file :param csv_filepath: csv file path :return: detections list """ detections = list() with open(csv_filepath, newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: top_right_coord = Coordinate(int(row[X_TOP_RIGHT_KEY]), int(row[Y_TOP_RIGHT_KEY])) bb = BoundingBox(top_right_coord, int(row[BB_W]), int(row[BB_H])) detection = Detection(bb, float(row[SCORE])) detections.append(detection) return detections
def detect(self, image): startTime = time.monotonic() bodies, _, weights = self._faceCascade.detectMultiScale3( image, 1.5, 5, outputRejectLevels=True) elapsedTime = (time.monotonic() - startTime) * 1000 logging.info("%.0f ms", elapsedTime) if len(bodies): logging.debug("Bodies: %s / Weights: %s", bodies, weights) else: return [] results = [] for x, y, w, h in bodies: results.append(BoundingBox(x, y, x + w, y + h)) return results
def split_widths(bounding_boxes, evaled_avg_letter_width): idx = 0 splitted = [] while idx < len(bounding_boxes) - 1: box = bounding_boxes[idx] width = box.w n = round(width / evaled_avg_letter_width) if n > 1: new_width = int(round(width / n)) bounding_boxes.pop(idx) splitted += [ BoundingBox((box.x + incr, 0, new_width, box.h)) for incr in range(0, int(n * new_width), int(new_width)) ] else: idx += 1 return sorted(bounding_boxes + splitted, key=lambda x: x.x)
def read_kitti_data(image_id, image_path, label_path, stride): """Reads groundtruth data from a Kitti annotation file.""" with open(label_path) as file: data = file.read() bboxes = [] lines = data.splitlines() for line in lines: line = line.split() if line[0] in LABEL_IDS: bboxes.append( BoundingBox(line[4], line[5], line[6], line[7], LABEL_IDS[line[0]])) return SSDImage(image_id, image_path, stride, len(LABELS), bboxes=bboxes)
def combine_horizontally(bounding_boxes, evaled_letter_width): # Sort by position sorted_boxes = sorted(bounding_boxes, key=lambda x: x.x, reverse=False) idx = 0 while idx < len(sorted_boxes) - 1: box = sorted_boxes[idx] next_box = sorted_boxes[idx + 1] combined = BoundingBox.combine(box, next_box) combined_max_width = evaled_letter_width * 1.2 if combined.w < combined_max_width: sorted_boxes.pop(idx) sorted_boxes.pop(idx) sorted_boxes.insert(idx, combined) else: idx += 1 return sorted_boxes