def _classification_to_kwiver_detections(classification, w, h): """ Convert kwarray classifications to kwiver deteted object sets Args: classification (bioharn.clf_predict.Classification) w (int): width of image h (int): height of image Returns: kwiver.vital.types.DetectedObjectSet """ detected_objects = DetectedObjectSet() if classification.data.get('prob', None) is not None: # If we have a probability for each class, uses that class_names = list(classification.classes) class_prob = classification.prob detected_object_type = DetectedObjectType(class_names, class_prob) else: # Otherwise we only have the score for the predicted calss class_name = classification.classes[classification.cidx] class_score = classification.conf detected_object_type = DetectedObjectType(class_name, class_score) bounding_box = BoundingBoxD(0, 0, w, h) detected_object = DetectedObject(bounding_box, classification.conf, detected_object_type) detected_objects.add(detected_object) return detected_objects
def detect(self, in_img_c): import tensorflow as tf import humanfriendly image_height = in_img_c.height() image_width = in_img_c.width() if (self.norm_image_type and self.norm_image_type != "none"): print("Normalizing input image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.norm_image_type, in_img, image_height) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) start_time = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - start_time print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) good_boxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidence_thresh): bbox = boxes[i] good_boxes.append(bbox) top_rel = bbox[0] left_rel = bbox[1] bottom_rel = bbox[2] right_rel = bbox[3] xmin = left_rel * image_width ymin = top_rel * image_height xmax = right_rel * image_width ymax = bottom_rel * image_height dot = DetectedObjectType(self.category_name, scores[i]) obj = DetectedObject(BoundingBoxD(xmin, ymin, xmax, ymax), scores[i], dot) detections.add(obj) print("Detected {}".format(len(good_boxes))) return detections
def _create_detected_object_set(): from kwiver.vital.types import ( DetectedObject, DetectedObjectSet, BoundingBoxD, ) dos = DetectedObjectSet() bbox = BoundingBoxD(0, 10, 100, 50) dos.add(DetectedObject(bbox, 0.2)) dos.add(DetectedObject(bbox, 0.5)) dos.add(DetectedObject(bbox, 0.4)) return dos
def _dowork(self, img_container): """ Helper to decouple the algorithm and pipeline logic CommandLine: xdoctest viame.processes.camtrawl.processes CamtrawlDetectFishProcess._dowork Example: >>> from viame.processes.camtrawl.processes import * >>> from kwiver.vital.types import ImageContainer >>> import kwiver.sprokit.pipeline.config >>> # construct dummy process instance >>> conf = kwiver.sprokit.pipeline.config.empty_config() >>> self = CamtrawlDetectFishProcess(conf) >>> self._configure() >>> # construct test data >>> from vital.util import VitalPIL >>> from PIL import Image as PILImage >>> pil_img = PILImage.open(ub.grabdata('https://i.imgur.com/Jno2da3.png')) >>> pil_img = PILImage.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)) >>> img_container = ImageContainer(VitalPIL.from_pil(pil_img)) >>> # Initialize the background detector by sending 10 black frames >>> for i in range(10): >>> empty_set = self._dowork(img_container) >>> # now add a white box that should be detected >>> np_img = np.zeros((512, 512, 3), dtype=np.uint8) >>> np_img[300:340, 220:380] = 255 >>> img_container = ImageContainer.fromarray(np_img) >>> detection_set = self._dowork(img_container) >>> assert len(detection_set) == 1 >>> obj = detection_set[0] """ # This should be read as np.uint8 np_img = img_container.asarray() detection_set = DetectedObjectSet() ct_detections = self.detector.detect(np_img) for detection in ct_detections: bbox = BoundingBoxD(*detection.bbox.coords) mask = detection.mask.astype(np.uint8) vital_mask = ImageContainer.fromarray(mask) dot = DetectedObjectType("Motion", 1.0) obj = DetectedObject(bbox, 1.0, dot, mask=vital_mask) detection_set.add(obj) return detection_set
def merge(self, det_sets): # Get detection HL info in a list pred_sets = [] for det_set in det_sets: pred_set = [] for det in det_set: # Extract box info for this det bbox = det.bounding_box bbox_min_x = int(bbox.min_x()) bbox_max_x = int(bbox.max_x()) bbox_min_y = int(bbox.min_y()) bbox_max_y = int(bbox.max_y()) # Extract type info for this det if det.type is None: continue #class_names = list( det.type.class_names() ) #class_scores = [ det.type.score( n ) for n in class_names ] class_name = det.type.get_most_likely_class() class_score = det.type.score(class_name) pred_set.append([ bbox_min_x, bbox_min_y, bbox_max_x, bbox_max_y, class_name, class_score ]) pred_sets.append(pred_set) # Run merging algorithm #ensemble_preds = ensemble_box( preds_set, self._fusion_weights, # self._iou_thr, self._skip_box_thr, self._sigma, self._fusion_type ) ensemble_preds = [] # Compile output detections output = DetectedObjectSet() for pred in ensemble_preds: score = pred[5] bbox = BoundingBoxD(pred[0], pred[1], pred[2], pred[3]) dot = DetectedObjectType(pred[4], score) det = DetectedObject(bbox, score, dot) output.add(det) return output
def _kwimage_to_kwiver_detections(detections): """ Convert kwimage detections to kwiver deteted object sets Args: detected_objects (kwimage.Detections) Returns: kwiver.vital.types.DetectedObjectSet """ from kwiver.vital.types.types import ImageContainer, Image segmentations = None # convert segmentation masks if 'segmentations' in detections.data: segmentations = detections.data['segmentations'] boxes = detections.boxes.to_tlbr() scores = detections.scores class_idxs = detections.class_idxs if not segmentations: # Placeholders segmentations = (None, ) * len(boxes) # convert to kwiver format, apply threshold detected_objects = DetectedObjectSet() for tlbr, score, cidx, seg in zip(boxes.data, scores, class_idxs, segmentations): class_name = detections.classes[cidx] bbox_int = np.round(tlbr).astype(np.int32) bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) detected_object_type = DetectedObjectType(class_name, score) detected_object = DetectedObject(bounding_box, score, detected_object_type) if seg: mask = seg.to_relative_mask().numpy().data detected_object.mask = ImageContainer(Image(mask)) detected_objects.add(detected_object) return detected_objects
def detect(self, image_data): input_image = image_data.asarray().astype('uint8') if self._rgb_to_bgr: input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) from mmdet.apis import inference_detector detections = inference_detector(self._model, input_image) if isinstance(detections, tuple): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size(bbox_result) > 0: bboxes = np.vstack(bbox_result) else: bboxes = [] # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] for i in inds: masks.append(maskUtils.decode(segms[i]).astype(np.bool)) # collect labels labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] if np.size(labels) > 0: labels = np.concatenate(labels) else: labels = [] # convert to kwiver format, apply threshold output = DetectedObjectSet() for bbox, label in zip(bboxes, labels): class_confidence = float(bbox[-1]) if class_confidence < self._thresh: continue bbox_int = bbox.astype(np.int32) bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) class_name = self._labels[label] detected_object_type = DetectedObjectType(class_name, class_confidence) detected_object = DetectedObject(bounding_box, np.max(class_confidence), detected_object_type) output.add(detected_object) if np.size(labels) > 0 and self._display_detections: mmcv.imshow_det_bboxes(input_image, bboxes, labels, class_names=self._labels, score_thr=self._thresh, show=True) return output
def _step(self): # Get all inputs even ones we don't use in_img_c = self.grab_input_using_trait('image') timestamp = self.grab_input_using_trait('timestamp') if not timestamp.has_valid_frame(): raise RuntimeError("Frame timestamps must contain frame IDs") frame_id = timestamp.get_frame() if self.has_input_port_edge_using_trait('detected_object_set'): detections = self.grab_input_using_trait('detected_object_set') else: detections = DetectedObjectSet() if self.has_input_port_edge_using_trait('initializations'): initializations = self.grab_input_using_trait('initializations') else: initializations = ObjectTrackSet() if self.has_input_port_edge_using_trait('recommendations'): recommendations = self.grab_input_using_trait('recommendations') else: recommendations = ObjectTrackSet() if self.has_input_port_edge_using_trait('evaluation_requests'): requests = self.grab_input_using_trait('evaluation_requests') else: requests = DetectedObjectSet() print('mdnet tracker timestamp = {!r}'.format(timestamp)) # Handle new track external initialization init_track_pool = initializations.tracks() recc_track_pool = recommendations.tracks() init_track_ids = [] img_used = False if len(init_track_pool) != 0 or len(self._trackers) != 0: img_npy = self.format_image(in_img_c) img_used = True for trk in init_track_pool: # Special case, initialize a track on a previous frame if trk[trk.last_frame].frame_id == self._last_frame_id and \ ( not trk.id in self._track_init_frames or \ self._track_init_frames[ trk.id ] < self._last_frame_id ): tid = trk.id cbox = trk[trk.last_frame].detection().bounding_box() bbox = [ cbox.min_x(), cbox.min_y(), cbox.width(), cbox.height() ] self._last_frame = self.format_image(self._last_frame) self._trackers[tid] = mdnet.MDNetTracker( self._last_frame, bbox) self._tracks[tid] = [ObjectTrackState(timestamp, cbox, 1.0)] self._track_init_frames[tid] = self._last_frame_id # This track has an initialization signal for the current frame elif trk[trk.last_frame].frame_id == frame_id: tid = trk.id cbox = trk[trk.last_frame].detection().bounding_box() bbox = [ cbox.min_x(), cbox.min_y(), cbox.width(), cbox.height() ] self._trackers[tid] = mdnet.MDNetTracker(img_npy, bbox) self._tracks[tid] = [ObjectTrackState(timestamp, cbox, 1.0)] init_track_ids.append(tid) self._track_init_frames[tid] = frame_id # Update existing tracks for tid in self._trackers.keys(): if tid in init_track_ids: continue # Already processed (initialized) on frame # Check if there's a recommendation for the update recc_bbox = [] for trk in recc_track_pool: if trk.id == tid and trk[trk.last_frame].frame_id == frame_id: cbox = trk[trk.last_frame].detection().bounding_box() recc_bbox = [ cbox.min_x(), cbox.min_y(), cbox.width(), cbox.height() ] break bbox, score = self._trackers[tid].update(img_npy, likely_bbox=recc_bbox) if score > mdnet.opts['success_thr']: cbox = BoundingBoxD(bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]) new_state = ObjectTrackState(timestamp, cbox, score) self._tracks[tid].append(new_state) # Handle track termination # TODO: Remove old or dead tracks # Classify requested evaluations # TODO: Evaluate input detections output_evaluations = DetectedObjectSet() # Output results output_tracks = ObjectTrackSet( [Track(tid, trk) for tid, trk in self._tracks.items()]) self.push_to_port_using_trait('timestamp', timestamp) self.push_to_port_using_trait('object_track_set', output_tracks) self.push_to_port_using_trait('evaluations', output_evaluations) self._last_frame_id = timestamp.get_frame() if img_used: self._last_frame = img_npy else: self._last_frame = in_img_c self._base_step()
def extract_chips_for_dets(self, image_files, truth_sets): import cv2 output_files = [] output_dets = [] for i in range(len(image_files)): filename = image_files[i] groundtruth = truth_sets[i] detections = [] scale = 1.0 if self._target_type_scales: scale = self.compute_scale_factor(groundtruth) if len(groundtruth) > 0: img = cv2.imread(filename) if len(np.shape(img)) < 2: continue img_max_x = np.shape(img)[1] img_max_y = np.shape(img)[0] # Optionally scale image if scale != 1.0: img_max_x = int(scale * img_max_x) img_max_y = int(scale * img_max_y) img = cv2.resize(img, (img_max_x, img_max_y)) # Run optional background detector on data if self._detector_model: kw_image = Image(img) kw_image_container = ImageContainer(kw_image) detections = self._detector.detect(kw_image_container) if len(groundtruth) == 0 and len(detections) == 0: continue overlaps = np.zeros((len(detections), len(groundtruth))) det_boxes = [] for det in detections: bbox = det.bounding_box det_boxes.append((int(bbox.min_x()), int(bbox.min_y()), int(bbox.width()), int(bbox.height()))) for i, gt in enumerate(groundtruth): # Extract chip for this detection bbox = gt.bounding_box bbox_min_x = int(bbox.min_x() * scale) bbox_max_x = int(bbox.max_x() * scale) bbox_min_y = int(bbox.min_y() * scale) bbox_max_y = int(bbox.max_y() * scale) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y max_overlap = 0.0 for j, det in enumerate(det_boxes): # Compute overlap between detection and truth (det_min_x, det_min_y, det_width, det_height) = det # Get the overlap rectangle overlap_x0 = max(bbox_min_x, det_min_x) overlap_y0 = max(bbox_min_y, det_min_y) overlap_x1 = min(bbox_max_x, det_min_x + det_width) overlap_y1 = min(bbox_max_y, det_min_y + det_height) # Check if there is an overlap if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0: continue # If yes, calculate the ratio of the overlap det_area = float(det_width * det_height) gt_area = float(bbox_width * bbox_height) int_area = float( (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)) overlap = min(int_area / det_area, int_area / gt_area) overlaps[j, i] = overlap if overlap >= self._min_overlap_for_association and overlap > max_overlap: max_overlap = overlap bbox_min_x = det_min_x bbox_min_y = det_min_y bbox_max_x = det_min_x + det_width bbox_max_y = det_min_y + det_height bbox_width = det_width bbox_height = det_height if self._chip_method == "fixed_width": chip_width = int(self._chip_width) half_width = int(chip_width / 2) bbox_min_x = int( (bbox_min_x + bbox_max_x) / 2) - half_width bbox_min_y = int( (bbox_min_y + bbox_max_y) / 2) - half_width bbox_max_x = bbox_min_x + chip_width bbox_max_y = bbox_min_y + chip_width bbox_width = chip_width bbox_height = chip_width bbox_area = bbox_width * bbox_height if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._reduce_category and gt.type() and \ gt.type().get_most_likely_class() == self._reduce_category and \ random.uniform( 0, 1 ) < 0.90: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] self._sample_count = self._sample_count + 1 crop_str = ('%09d' % self._sample_count) + ".png" new_file = os.path.join(self._chip_directory, crop_str) cv2.imwrite(new_file, crop) # Set new box size for this detection gt.bounding_box = BoundingBoxD(0, 0, np.shape(crop)[1], np.shape(crop)[0]) new_set = DetectedObjectSet() new_set.add(gt) output_files.append(new_file) output_dets.append(new_set) neg_count = 0 for j, det in enumerate(detections): if max(overlaps[j]) >= self._max_overlap_for_negative: continue bbox = det.bounding_box bbox_min_x = int(bbox.min_x()) bbox_max_x = int(bbox.max_x()) bbox_min_y = int(bbox.min_y()) bbox_max_y = int(bbox.max_y()) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y bbox_area = bbox_width * bbox_height if self._chip_method == "fixed_width": chip_width = int(self._chip_width) half_width = int(chip_width / 2) bbox_min_x = int( (bbox_min_x + bbox_max_x) / 2) - half_width bbox_min_y = int( (bbox_min_y + bbox_max_y) / 2) - half_width bbox_max_x = bbox_min_x + chip_width bbox_max_y = bbox_min_y + chip_width bbox_width = chip_width bbox_height = chip_width if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue # Handle random factor if self._max_neg_per_frame < 1.0 and random.uniform( 0, 1) > self._max_neg_per_frame: break crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] self._sample_count = self._sample_count + 1 crop_str = ('%09d' % self._sample_count) + ".png" new_file = os.path.join(self._chip_directory, crop_str) cv2.imwrite(new_file, crop) # Set new box size for this detection det.bounding_box = BoundingBoxD(0, 0, np.shape(crop)[1], np.shape(crop)[0]) det.type = DetectedObjectType(self._negative_category, 1.0) new_set = DetectedObjectSet() new_set.add(det) output_files.append(new_file) output_dets.append(new_set) # Check maximum negative count neg_count = neg_count + 1 if neg_count > self._max_neg_per_frame: break return [output_files, output_dets]