def _kwimage_to_kwiver_detections(detections): """ Convert kwimage detections to kwiver deteted object sets Args: detected_objects (kwimage.Detections) Returns: kwiver.vital.types.DetectedObjectSet """ # convert segmentation masks if 'segmentations' in detections.data: print("Warning: segmentations not implemented") boxes = detections.boxes.to_tlbr() scores = detections.scores class_idxs = detections.class_idxs # convert to kwiver format, apply threshold detected_objects = DetectedObjectSet() for tlbr, score, cidx in zip(boxes.data, scores, class_idxs): class_name = detections.classes[cidx] bbox_int = np.round(tlbr).astype(np.int32) bounding_box = BoundingBox(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) detected_object_type = DetectedObjectType(class_name, score) detected_object = DetectedObject(bounding_box, score, detected_object_type) detected_objects.add(detected_object) return detected_objects
def _classification_to_kwiver_detections(classification, w, h): """ Convert kwarray classifications to kwiver deteted object sets Args: classification (bioharn.clf_predict.Classification) w (int): width of image h (int): height of image Returns: kwiver.vital.types.DetectedObjectSet """ detected_objects = DetectedObjectSet() if classification.data.get('prob', None) is not None: # If we have a probability for each class, uses that class_names = list(classification.classes) class_prob = classification.prob detected_object_type = DetectedObjectType(class_names, class_prob) else: # Otherwise we only have the score for the predicted calss class_name = classification.classes[classification.cidx] class_score = classification.conf detected_object_type = DetectedObjectType(class_name, class_score) bounding_box = BoundingBoxD(0, 0, w, h) detected_object = DetectedObject(bounding_box, classification.conf, detected_object_type) detected_objects.add(detected_object) return detected_objects
def filter_truth(self, init_truth, categories): filtered_truth = DetectedObjectSet() use_frame = True max_length = int(self._max_scale_wrt_chip * float(self._chip_width)) for i, item in enumerate(init_truth): if item.type is None: continue class_lbl = item.type.get_most_likely_class() if categories is not None and not categories.has_class_name( class_lbl): if self._mode == "detection_refiner": class_lbl = self._negative_category else: continue if categories is not None: class_lbl = categories.get_class_name(class_lbl) elif class_lbl not in self._categories: self._categories.append(class_lbl) item.type = DetectedObjectType(class_lbl, 1.0) if self._mode == "detector" and \ ( item.bounding_box.width() > max_length or \ item.bounding_box.height() > max_length ): use_frame = False break filtered_truth.add(item) if self._gt_frames_only and len(init_truth) == 0: use_frame = False return filtered_truth, use_frame
def _step(self): try: # Grab image container from port using traits in_img_c = self.grab_input_using_trait('image') timestamp = self.grab_input_using_trait('timestamp') dos_ptr = self.grab_input_using_trait('detected_object_set') print('timestamp = {!r}'.format(timestamp)) # Get current frame and give it to app feature extractor im = get_pil_image(in_img_c.image()) self._app_feature_extractor.frame = im bbox_num = 0 # Get detection bbox dos = dos_ptr.select(self._select_threshold) bbox_num = dos.size() det_obj_set = DetectedObjectSet() if bbox_num == 0: print( '!!! No bbox is provided on this frame and skip this frame !!!' ) else: # appearance features (format: pytorch tensor) app_f_begin = timer() pt_app_features = self._app_feature_extractor(dos, False) app_f_end = timer() print('%%%app feature eclapsed time: {}'.format(app_f_end - app_f_begin)) # get new track state from new frame and detections for idx, item in enumerate(dos): bbox = item.bounding_box() fid = timestamp.get_frame() ts = timestamp.get_time_usec() d_obj = item # store app feature to detectedObject app_f = new_descriptor(pt_app_features[idx].numpy().size) app_f[:] = pt_app_features[idx].numpy() # print( pt_app_features[idx].numpy() ) d_obj.set_descriptor(app_f) det_obj_set.add(d_obj) # push track set to output port self.push_to_port_using_trait('detected_object_set', det_obj_set) self._base_step() except BaseException as e: print(repr(e)) import traceback print(traceback.format_exc()) sys.stdout.flush() raise
def _create_detected_object_set(): from kwiver.vital.types import DetectedObject, DetectedObjectSet, BoundingBox dos = DetectedObjectSet() bbox = BoundingBox(0, 10, 100, 50) dos.add(DetectedObject(bbox, 0.2)) dos.add(DetectedObject(bbox, 0.5)) dos.add(DetectedObject(bbox, 0.4)) return dos
def detect(self, in_img_c): import tensorflow as tf import humanfriendly image_height = in_img_c.height() image_width = in_img_c.width() if (self.norm_image_type and self.norm_image_type != "none"): print("Normalizing input image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.norm_image_type, in_img, image_height) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) start_time = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - start_time print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) good_boxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidence_thresh): bbox = boxes[i] good_boxes.append(bbox) top_rel = bbox[0] left_rel = bbox[1] bottom_rel = bbox[2] right_rel = bbox[3] xmin = left_rel * image_width ymin = top_rel * image_height xmax = right_rel * image_width ymax = bottom_rel * image_height dot = DetectedObjectType(self.category_name, scores[i]) obj = DetectedObject(BoundingBoxD(xmin, ymin, xmax, ymax), scores[i], dot) detections.add(obj) print("Detected {}".format(len(good_boxes))) return detections
def _dowork(self, img_container): """ Helper to decouple the algorithm and pipeline logic CommandLine: xdoctest viame.processes.camtrawl.processes CamtrawlDetectFishProcess._dowork Example: >>> from viame.processes.camtrawl.processes import * >>> from kwiver.vital.types import ImageContainer >>> import kwiver.sprokit.pipeline.config >>> # construct dummy process instance >>> conf = kwiver.sprokit.pipeline.config.empty_config() >>> self = CamtrawlDetectFishProcess(conf) >>> self._configure() >>> # construct test data >>> from vital.util import VitalPIL >>> from PIL import Image as PILImage >>> pil_img = PILImage.open(ub.grabdata('https://i.imgur.com/Jno2da3.png')) >>> pil_img = PILImage.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)) >>> img_container = ImageContainer(VitalPIL.from_pil(pil_img)) >>> # Initialize the background detector by sending 10 black frames >>> for i in range(10): >>> empty_set = self._dowork(img_container) >>> # now add a white box that should be detected >>> np_img = np.zeros((512, 512, 3), dtype=np.uint8) >>> np_img[300:340, 220:380] = 255 >>> img_container = ImageContainer.fromarray(np_img) >>> detection_set = self._dowork(img_container) >>> assert len(detection_set) == 1 >>> obj = detection_set[0] """ # This should be read as np.uint8 np_img = img_container.asarray() detection_set = DetectedObjectSet() ct_detections = self.detector.detect(np_img) for detection in ct_detections: bbox = BoundingBoxD(*detection.bbox.coords) mask = detection.mask.astype(np.uint8) vital_mask = ImageContainer.fromarray(mask) dot = DetectedObjectType("Motion", 1.0) obj = DetectedObject(bbox, 1.0, dot, mask=vital_mask) detection_set.add(obj) return detection_set
def _step(self): image_container = self.grab_input_using_trait("image") timestamp = self.grab_input_using_trait("timestamp") file_name = self.grab_input_using_trait("file_name") image = image_container.asarray() h, w, _ = image.shape bbox_x = w//2 bbox_y = h//2 bbox = BoundingBox( bbox_x - int(self.config_value("bbox_width"))//2, bbox_y - int(self.config_value("bbox_height"))//2, bbox_x + int(self.config_value("bbox_width"))//2, bbox_y + int(self.config_value("bbox_height"))//2 ) dot = DetectedObjectType("Test", 1.0) do = DetectedObject(bbox, 1.0, dot) dos = DetectedObjectSet() dos.add(do) self.push_to_port_using_trait("detected_object_set", dos)
def merge(self, det_sets): # Get detection HL info in a list pred_sets = [] for det_set in det_sets: pred_set = [] for det in det_set: # Extract box info for this det bbox = det.bounding_box bbox_min_x = int(bbox.min_x()) bbox_max_x = int(bbox.max_x()) bbox_min_y = int(bbox.min_y()) bbox_max_y = int(bbox.max_y()) # Extract type info for this det if det.type is None: continue #class_names = list( det.type.class_names() ) #class_scores = [ det.type.score( n ) for n in class_names ] class_name = det.type.get_most_likely_class() class_score = det.type.score(class_name) pred_set.append([ bbox_min_x, bbox_min_y, bbox_max_x, bbox_max_y, class_name, class_score ]) pred_sets.append(pred_set) # Run merging algorithm #ensemble_preds = ensemble_box( preds_set, self._fusion_weights, # self._iou_thr, self._skip_box_thr, self._sigma, self._fusion_type ) ensemble_preds = [] # Compile output detections output = DetectedObjectSet() for pred in ensemble_preds: score = pred[5] bbox = BoundingBoxD(pred[0], pred[1], pred[2], pred[3]) dot = DetectedObjectType(pred[4], score) det = DetectedObject(bbox, score, dot) output.add(det) return output
def _kwimage_to_kwiver_detections(detections): """ Convert kwimage detections to kwiver deteted object sets Args: detected_objects (kwimage.Detections) Returns: kwiver.vital.types.DetectedObjectSet """ from kwiver.vital.types.types import ImageContainer, Image segmentations = None # convert segmentation masks if 'segmentations' in detections.data: segmentations = detections.data['segmentations'] boxes = detections.boxes.to_tlbr() scores = detections.scores class_idxs = detections.class_idxs if not segmentations: # Placeholders segmentations = (None, ) * len(boxes) # convert to kwiver format, apply threshold detected_objects = DetectedObjectSet() for tlbr, score, cidx, seg in zip(boxes.data, scores, class_idxs, segmentations): class_name = detections.classes[cidx] bbox_int = np.round(tlbr).astype(np.int32) bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) detected_object_type = DetectedObjectType(class_name, score) detected_object = DetectedObject(bounding_box, score, detected_object_type) if seg: mask = seg.to_relative_mask().numpy().data detected_object.mask = ImageContainer(Image(mask)) detected_objects.add(detected_object) return detected_objects
def detect(self, image_data): input_image = image_data.asarray().astype('uint8') if self._rgb_to_bgr: input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) from mmdet.apis import inference_detector detections = inference_detector(self._model, input_image) if isinstance(detections, tuple): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size(bbox_result) > 0: bboxes = np.vstack(bbox_result) else: bboxes = [] # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] for i in inds: masks.append(maskUtils.decode(segms[i]).astype(np.bool)) # collect labels labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] if np.size(labels) > 0: labels = np.concatenate(labels) else: labels = [] # convert to kwiver format, apply threshold output = DetectedObjectSet() for bbox, label in zip(bboxes, labels): class_confidence = float(bbox[-1]) if class_confidence < self._thresh: continue bbox_int = bbox.astype(np.int32) bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) class_name = self._labels[label] detected_object_type = DetectedObjectType(class_name, class_confidence) detected_object = DetectedObject(bounding_box, np.max(class_confidence), detected_object_type) output.add(detected_object) if np.size(labels) > 0 and self._display_detections: mmcv.imshow_det_bboxes(input_image, bboxes, labels, class_names=self._labels, score_thr=self._thresh, show=True) return output
def refine(self, image_data, detections): if len(detections) == 0: return detections img = image_data.asarray().astype('uint8') predictor = self.predictor scale = 1.0 img_max_x = np.shape(img)[1] img_max_y = np.shape(img)[0] if self._target_type_scales: scale = self.compute_scale_factor(detections) if scale != 1.0: img_max_x = int(img_max_x * scale) img_max_y = int(img_max_y * scale) img = cv2.resize(img, (img_max_x, img_max_y)) # Extract patches for ROIs image_chips = [] detection_ids = [] for i, det in enumerate(detections): # Extract chip for this detection bbox = det.bounding_box bbox_min_x = int(bbox.min_x() * scale) bbox_max_x = int(bbox.max_x() * scale) bbox_min_y = int(bbox.min_y() * scale) bbox_max_y = int(bbox.max_y() * scale) if self._kwiver_config['chip_method'] == "fixed_width": chip_width = int(self._kwiver_config['chip_width']) half_width = int(chip_width / 2) bbox_min_x = int((bbox_min_x + bbox_max_x) / 2) - half_width bbox_min_y = int((bbox_min_y + bbox_max_y) / 2) - half_width bbox_max_x = bbox_min_x + chip_width bbox_max_y = bbox_min_y + chip_width if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue else: if bbox_min_x < 0: bbox_min_x = 0 if bbox_min_y < 0: bbox_min_y = 0 if bbox_max_x > img_max_x: bbox_max_x = img_max_x if bbox_max_y > img_max_y: bbox_max_y = img_max_y bbox_area = (bbox_max_x - bbox_min_x) * (bbox_max_y - bbox_min_y) if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] image_chips.append(crop) detection_ids.append(i) # Run classifier on ROIs classifications = list(predictor.predict(image_chips)) # Put classifications back into detections output = DetectedObjectSet() for i, det in enumerate(detections): if len(detection_ids) == 0 or i != detection_ids[0]: output.add(det) continue new_class = classifications[0] if new_class.data.get('prob', None) is not None: # If we have a probability for each class, uses that class_names = list(new_class.classes) class_scores = list(new_class.prob) else: # Otherwise we only have the score for the predicted class class_names = [new_class.classes[new_class.cidx]] class_scores = [new_class.conf] if self._average_prior and det.type is not None: priors = det.type prior_names = priors.class_names() for name in prior_names: if name in class_names: class_scores[class_names.index(name)] += priors.score( name) else: class_names.append(name) class_scores.append(priors.score(name)) for i in range(len(class_scores)): class_scores[i] = class_scores[i] * 0.5 detected_object_type = DetectedObjectType(class_names, class_scores) det.type = detected_object_type output.add(det) detection_ids.pop(0) classifications.pop(0) return output
def create_detected_object_set(): dos = DetectedObjectSet() dos.add(DetectedObject(create_bounding_box())) return dos
def _step(self): try: def timing(desc, f): """Return f(), printing a message about how long it took""" start = timer() result = f() end = timer() print('%%%', desc, ' elapsed time: ', end - start, sep='') return result print('step', self._step_id) # grab image container from port using traits in_img_c = self.grab_input_using_trait('image') timestamp = self.grab_input_using_trait('timestamp') dos_ptr = self.grab_input_using_trait('detected_object_set') print('timestamp =', repr(timestamp)) # Get current frame im = get_pil_image(in_img_c.image()).convert('RGB') # Get detection bbox if self._gtbbox_flag: dos = self._m_bbox[self._step_id] bbox_num = len(dos) else: dos = dos_ptr.select(self._select_threshold) bbox_num = dos.size() #print('bbox list len is', dos.size()) det_obj_set = DetectedObjectSet() if bbox_num == 0: print('!!! No bbox is provided on this frame. Skipping this frame !!!') else: # interaction features grid_feature_list = timing('grid feature', lambda: self._grid(im.size, dos, self._gtbbox_flag)) # appearance features (format: pytorch tensor) pt_app_features = timing('app feature', lambda: self._app_feature_extractor(im, dos, self._gtbbox_flag)) track_state_list = [] next_track_id = int(self._track_set.get_max_track_id()) + 1 # get new track state from new frame and detections for idx, item in enumerate(dos): if self._gtbbox_flag: bbox = item fid = self._step_id ts = self._step_id d_obj = DetectedObject(bbox=item, confidence=1.0) else: bbox = item.bounding_box() fid = timestamp.get_frame() ts = timestamp.get_time_usec() d_obj = item if self._add_features_to_detections: # store app feature to detected_object app_f = new_descriptor(g_config.A_F_num) app_f[:] = pt_app_features[idx].numpy() d_obj.set_descriptor(app_f) det_obj_set.add(d_obj) # build track state for current bbox for matching cur_ts = track_state(frame_id=self._step_id, bbox_center=bbox.center(), interaction_feature=grid_feature_list[idx], app_feature=pt_app_features[idx], bbox=[int(bbox.min_x()), int(bbox.min_y()), int(bbox.width()), int(bbox.height())], detected_object=d_obj, sys_frame_id=fid, sys_frame_time=ts) track_state_list.append(cur_ts) # if there are no tracks, generate new tracks from the track_state_list if not self._track_flag: next_track_id = self._track_set.add_new_track_state_list(next_track_id, track_state_list, self._track_initialization_threshold) self._track_flag = True else: # check whether we need to terminate a track for track in list(self._track_set.iter_active()): # terminating a track based on readin_frame_id or original_frame_id gap if (self._step_id - track[-1].frame_id > self._terminate_track_threshold or fid - track[-1].sys_frame_id > self._sys_terminate_track_threshold): self._track_set.deactivate_track(track) # call IOU tracker if self._IOU_flag: self._track_set, track_state_list = timing('IOU tracking', lambda: ( self._iou_tracker(self._track_set, track_state_list) )) #print('***track_set len', len(self._track_set)) #print('***track_state_list len', len(track_state_list)) # estimate similarity matrix similarity_mat, track_idx_list = timing('SRNN association', lambda: ( self._srnn_matching(self._track_set, track_state_list, self._ts_threshold) )) # reset update_flag self._track_set.reset_updated_flag() # Hungarian algorithm row_idx_list, col_idx_list = timing('Hungarian algorithm', lambda: ( sp.optimize.linear_sum_assignment(similarity_mat) )) for i in range(len(row_idx_list)): r = row_idx_list[i] c = col_idx_list[i] if -similarity_mat[r, c] < self._similarity_threshold: # initialize a new track if (track_state_list[c].detected_object.confidence() >= self._track_initialization_threshold): self._track_set.add_new_track_state(next_track_id, track_state_list[c]) next_track_id += 1 else: # add to existing track self._track_set.update_track(track_idx_list[r], track_state_list[c]) # for the remaining unmatched track states, we initialize new tracks if len(track_state_list) - len(col_idx_list) > 0: for i in range(len(track_state_list)): if (i not in col_idx_list and (track_state_list[i].detected_object.confidence() >= self._track_initialization_threshold)): self._track_set.add_new_track_state(next_track_id, track_state_list[i]) next_track_id += 1 print('total tracks', len(self._track_set)) # push track set to output port ot_list = ts2ot_list(self._track_set) ots = ObjectTrackSet(ot_list) self.push_to_port_using_trait('object_track_set', ots) self.push_to_port_using_trait('detected_object_set', det_obj_set) self._step_id += 1 self._base_step() except BaseException as e: print( repr( e ) ) import traceback print( traceback.format_exc() ) sys.stdout.flush() raise
def extract_chips_for_dets(self, image_files, truth_sets): import cv2 output_files = [] output_dets = [] for i in range(len(image_files)): filename = image_files[i] groundtruth = truth_sets[i] detections = [] scale = 1.0 if self._target_type_scales: scale = self.compute_scale_factor(groundtruth) if len(groundtruth) > 0: img = cv2.imread(filename) if len(np.shape(img)) < 2: continue img_max_x = np.shape(img)[1] img_max_y = np.shape(img)[0] # Optionally scale image if scale != 1.0: img_max_x = int(scale * img_max_x) img_max_y = int(scale * img_max_y) img = cv2.resize(img, (img_max_x, img_max_y)) # Run optional background detector on data if self._detector_model: kw_image = Image(img) kw_image_container = ImageContainer(kw_image) detections = self._detector.detect(kw_image_container) if len(groundtruth) == 0 and len(detections) == 0: continue overlaps = np.zeros((len(detections), len(groundtruth))) det_boxes = [] for det in detections: bbox = det.bounding_box det_boxes.append((int(bbox.min_x()), int(bbox.min_y()), int(bbox.width()), int(bbox.height()))) for i, gt in enumerate(groundtruth): # Extract chip for this detection bbox = gt.bounding_box bbox_min_x = int(bbox.min_x() * scale) bbox_max_x = int(bbox.max_x() * scale) bbox_min_y = int(bbox.min_y() * scale) bbox_max_y = int(bbox.max_y() * scale) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y max_overlap = 0.0 for j, det in enumerate(det_boxes): # Compute overlap between detection and truth (det_min_x, det_min_y, det_width, det_height) = det # Get the overlap rectangle overlap_x0 = max(bbox_min_x, det_min_x) overlap_y0 = max(bbox_min_y, det_min_y) overlap_x1 = min(bbox_max_x, det_min_x + det_width) overlap_y1 = min(bbox_max_y, det_min_y + det_height) # Check if there is an overlap if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0: continue # If yes, calculate the ratio of the overlap det_area = float(det_width * det_height) gt_area = float(bbox_width * bbox_height) int_area = float( (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)) overlap = min(int_area / det_area, int_area / gt_area) overlaps[j, i] = overlap if overlap >= self._min_overlap_for_association and overlap > max_overlap: max_overlap = overlap bbox_min_x = det_min_x bbox_min_y = det_min_y bbox_max_x = det_min_x + det_width bbox_max_y = det_min_y + det_height bbox_width = det_width bbox_height = det_height if self._chip_method == "fixed_width": chip_width = int(self._chip_width) half_width = int(chip_width / 2) bbox_min_x = int( (bbox_min_x + bbox_max_x) / 2) - half_width bbox_min_y = int( (bbox_min_y + bbox_max_y) / 2) - half_width bbox_max_x = bbox_min_x + chip_width bbox_max_y = bbox_min_y + chip_width bbox_width = chip_width bbox_height = chip_width bbox_area = bbox_width * bbox_height if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._reduce_category and gt.type() and \ gt.type().get_most_likely_class() == self._reduce_category and \ random.uniform( 0, 1 ) < 0.90: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] self._sample_count = self._sample_count + 1 crop_str = ('%09d' % self._sample_count) + ".png" new_file = os.path.join(self._chip_directory, crop_str) cv2.imwrite(new_file, crop) # Set new box size for this detection gt.bounding_box = BoundingBoxD(0, 0, np.shape(crop)[1], np.shape(crop)[0]) new_set = DetectedObjectSet() new_set.add(gt) output_files.append(new_file) output_dets.append(new_set) neg_count = 0 for j, det in enumerate(detections): if max(overlaps[j]) >= self._max_overlap_for_negative: continue bbox = det.bounding_box bbox_min_x = int(bbox.min_x()) bbox_max_x = int(bbox.max_x()) bbox_min_y = int(bbox.min_y()) bbox_max_y = int(bbox.max_y()) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y bbox_area = bbox_width * bbox_height if self._chip_method == "fixed_width": chip_width = int(self._chip_width) half_width = int(chip_width / 2) bbox_min_x = int( (bbox_min_x + bbox_max_x) / 2) - half_width bbox_min_y = int( (bbox_min_y + bbox_max_y) / 2) - half_width bbox_max_x = bbox_min_x + chip_width bbox_max_y = bbox_min_y + chip_width bbox_width = chip_width bbox_height = chip_width if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue # Handle random factor if self._max_neg_per_frame < 1.0 and random.uniform( 0, 1) > self._max_neg_per_frame: break crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] self._sample_count = self._sample_count + 1 crop_str = ('%09d' % self._sample_count) + ".png" new_file = os.path.join(self._chip_directory, crop_str) cv2.imwrite(new_file, crop) # Set new box size for this detection det.bounding_box = BoundingBoxD(0, 0, np.shape(crop)[1], np.shape(crop)[0]) det.type = DetectedObjectType(self._negative_category, 1.0) new_set = DetectedObjectSet() new_set.add(det) output_files.append(new_file) output_dets.append(new_set) # Check maximum negative count neg_count = neg_count + 1 if neg_count > self._max_neg_per_frame: break return [output_files, output_dets]