def filter_truth(self, init_truth, categories): filtered_truth = DetectedObjectSet() use_frame = True max_length = int(self._max_scale_wrt_chip * float(self._chip_width)) for i, item in enumerate(init_truth): if item.type() is None: continue class_lbl = item.type().get_most_likely_class() if categories is not None and not categories.has_class_id( class_lbl): continue if categories is not None: class_lbl = categories.get_class_name(class_lbl) elif class_lbl not in self._categories: self._categories.append(class_lbl) truth_type = DetectedObjectType(class_lbl, 1.0) item.set_type(truth_type) if self._mode == "detector" and \ ( item.bounding_box().width() > max_length or \ item.bounding_box().height() > max_length ): use_frame = False break filtered_truth.add(item) if self._gt_frames_only and len(init_truth) == 0: use_frame = False return filtered_truth, use_frame
def detect( self, image_data ): # Convert image to 8-bit numpy input_image = image_data.asarray().astype( 'uint8' ) # TODO: do something with numpy image producing detections bboxes = [] labels = [] # Convert detections to kwiver format output = DetectedObjectSet() for bbox, label in zip( bboxes, labels ): bbox_int = bbox.astype( np.int32 ) bounding_box = BoundingBox( bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3] ) detected_object_type = DetectedObjectType( label, 1.0 ) detected_object = DetectedObject( bounding_box, np.max( class_confidence ), detected_object_type ) output.add( detected_object ) return output
def _step(self): print("[DEBUG] ----- start step") face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # grab image container from port using traits frame_c = self.grab_input_using_trait('image') # Get image from container frame_in = frame_c.image() #convert generic image to PIL pil_image = get_pil_image(frame_in) #convert to matrix frame = np.array(pil_image) detected_set = DetectedObjectSet() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) gray_frame = cv2.equalizeHist(gray_frame) faces = face_cascade.detectMultiScale(gray_frame, 1.3, 5) for (x, y, w, h) in faces: bbox = BoundingBox(x, y, x + w, y + h) # get new image handle #new_ic = ImageContainer( frame ) dot = DetectedObjectType("face", 1.0) detected_set.add(DetectedObject(bbox, 1.0, dot)) # push object to output port self.push_to_port_using_trait('detected_object_set', detected_set) self._base_step()
def _step(self): print("[DEBUG] ----- start step") # grab image container from port using traits in_img_c = self.grab_input_using_trait("image") imageHeight = in_img_c.height() imageWidth = in_img_c.width() if (self.normImageType): print("Normalize image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.normImageType, imageHeight) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) self.push_to_port_using_trait("image_norm", ImageContainer(Image(in_img))) startTime = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - startTime print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) goodBoxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidenceThresh): bbox = boxes[i] goodBoxes.append(bbox) topRel = bbox[0] leftRel = bbox[1] bottomRel = bbox[2] rightRel = bbox[3] xmin = leftRel * imageWidth ymin = topRel * imageHeight xmax = rightRel * imageWidth ymax = bottomRel * imageHeight obj = DetectedObject(BoundingBox(xmin, ymin, xmax, ymax), scores[i]) detections.add(obj) print("Detected {}".format(len(goodBoxes))) self.push_to_port_using_trait("detected_object_set", detections) self._base_step()
def detect(self, in_img_c): image_height = in_img_c.height() image_width = in_img_c.width() if (self.norm_image_type and self.norm_image_type != "none"): print("Normalizing input image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.norm_image_type, in_img, image_height) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) start_time = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - start_time print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) good_boxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidence_thresh): bbox = boxes[i] good_boxes.append(bbox) top_rel = bbox[0] left_rel = bbox[1] bottom_rel = bbox[2] right_rel = bbox[3] xmin = left_rel * image_width ymin = top_rel * image_height xmax = right_rel * image_width ymax = bottom_rel * image_height dot = DetectedObjectType(self.category_name, scores[i]) obj = DetectedObject(BoundingBox(xmin, ymin, xmax, ymax), scores[i], dot) detections.add(obj) print("Detected {}".format(len(good_boxes))) return detections
def detect( self, in_img_c ): import tensorflow as tf import humanfriendly image_height = in_img_c.height(); image_width = in_img_c.width() if (self.norm_image_type and self.norm_image_type != "none"): print("Normalizing input image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.norm_image_type, in_img, image_height) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1,1,3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) start_time = time.time() boxes, scores, classes = self.generate_detection(self.detection_graph, in_img) elapsed = time.time() - start_time print("Done running detector in {}".format(humanfriendly.format_timespan(elapsed))) good_boxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if(scores[i] >= self.confidence_thresh): bbox = boxes[i] good_boxes.append(bbox) top_rel = bbox[0] left_rel = bbox[1] bottom_rel = bbox[2] right_rel = bbox[3] xmin = left_rel * image_width ymin = top_rel * image_height xmax = right_rel * image_width ymax = bottom_rel * image_height dot = DetectedObjectType(self.category_name, scores[i]) obj = DetectedObject(BoundingBox(xmin, ymin, xmax, ymax), scores[i], dot) detections.add(obj) print("Detected {}".format(len(good_boxes))) return detections
def detect(self, image_c): cascade_classifier = cv2.CascadeClassifier(self.classifier_file) image = image_c.image().asarray().astype(np.uint8) detected_object_set = DetectedObjectSet() # NOTE: assarray() function return an rgb representation of the image gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) gray_image = cv2.equalizeHist(gray_image) faces = cascade_classifier.detectMultiScale(gray_image, self.scale_factor, self.min_neighbor) for (x, y, w, h) in faces: bbox = BoundingBox(x, y, x + w, y + h) dot = DetectedObjectType(self.classifier_name, 1.0) detected_object_set.add(DetectedObject(bbox, 1.0, dot)) return detected_object_set
def create_track_dict(self, object_track_set): if self.track_dict_initialized: return logger.debug("Initializing track dict") tracks = object_track_set.tracks() for i, track in enumerate(tracks): # track has ['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'all_frame_ids', 'append', 'back', 'find_state', 'first_frame', 'front', 'id', 'is_empty', 'last_frame', 'size'] # t has .timestamp .tx .track_point .image_point .detection() # det has .bounding_box() .type() # bbox has .min_x() .min_y() .max_x() .max_y()) for t in track: det = t.detection() bbox = det.bounding_box() frame_id = t.frame_id logger.debug('Frame Id %d exists %s' % (frame_id, str(frame_id in self.track_dict))) if frame_id not in self.track_dict: self.track_dict[frame_id] = DetectedObjectSet() new_bbox = BoundingBox(bbox.min_x() - 1, bbox.min_y() - 1, bbox.max_x() + 1, bbox.max_y() + 1) det.set_bounding_box(new_bbox) self.track_dict[frame_id].add(det) self.track_dict_initialized = True keys = sorted(list(self.track_dict.keys())) for frame_id in keys: logger.debug('Frame %d : %d input detections' % (frame_id, len(self.track_dict[frame_id]))) return
def detect( self, image_data ): input_image = image_data.asarray().astype( 'uint8' ) from mmdet.apis import inference_detector gpu_string = 'cuda:' + str( self._gpu_index ) detections = inference_detector( self._model, input_image, self._cfg, device=gpu_string ) class_names = [ 'fish' ] * 10000 if isinstance( detections, tuple ): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size( bbox_result ) > 0: bboxes = np.vstack( bbox_result ) else: bboxes = [] sys.stdout.write( "Detected " + str( len( bbox_result ) ) + " objects" ) sys.stdout.flush() # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list( segm_result ) inds = np.where( bboxes[:, -1] > score_thr )[0] for i in inds: masks.append( maskUtils.decode( segms[i] ).astype( np.bool ) ) # collect labels labels = [ np.full( bbox.shape[0], i, dtype=np.int32 ) for i, bbox in enumerate( bbox_result ) ] if np.size( labels ) > 0: labels = np.concatenate( labels ) else: labels = [] # convert to kwiver format, apply threshold output = [] for entry in []: output.append( DetectedObject( BoundingBox( 1,1,2,2 ) ) ) if np.size( labels ) > 0: mmcv.imshow_det_bboxes( input_image, bboxes, labels, class_names=class_names, score_thr=-100.0, show=True) return DetectedObjectSet( output )
def detect(self, image_data): dot = DetectedObjectSet([ DetectedObject( BoundingBox( self.m_center_x + self.frame_ct * self.m_dx - self.m_width / 2.0, self.m_center_y + self.frame_ct * self.m_dy - self.m_height / 2.0, self.m_center_x + self.frame_ct * self.m_dx + self.m_width / 2.0, self.m_center_y + self.frame_ct * self.m_dy + self.m_height / 2.0)) ]) self.frame_ct += 1 return dot
def _convert_detected_object_set_in(datum_ptr): """ Convert datum as PyCapsule to image_container opaque handle. """ _VCL = find_vital_library.find_vital_type_converter_library() # Convert from datum to opaque handle. func = _VCL.vital_detected_object_set_from_datum func.argtypes = [ ctypes.py_object ] func.restype = DetectedObjectSet.C_TYPE_PTR # get opaque handle from the datum handle = func(datum_ptr) # convert handle to python object - from c-ptr return DetectedObjectSet( from_cptr=handle )
def _step(self): # grab image container from port using traits in_img_c = self.grab_input_using_trait('image') # Get python image from conatiner (just for show) in_img = in_img_c.get_image() # Print out text to screen print "Text: " + str(self.text) # push dummy detections object to output port detections = DetectedObjectSet() self.push_to_port_using_trait('detected_object_set', detections) self._base_step()
def _step(self): # grab image container from port using traits in_img_c = self.grab_input_using_trait('image') # Get python image from conatiner (just for show) in_img = in_img_c.image() # Print out example_param to screen print('Example parameter: ' + str(self.example_param)) # push dummy (empty) detections object to output port detections = DetectedObjectSet() self.push_to_port_using_trait('detected_object_set', detections) self._base_step()
def _step(self): # grab image container from port using traits ts = self.grab_input_using_trait('timestamp') object_track_set = self.grab_input_using_trait('object_track_set') current_frame_id = ts.get_frame() self.create_track_dict(object_track_set) logger.debug('=== Frame Id: %d ===' % current_frame_id) # return the detected object set to the port if current_frame_id in self.track_dict: detected_object_set = self.track_dict[current_frame_id] logger.debug("Detections: " + str(detected_object_set.size())) self.push_to_port_using_trait('detected_object_set', detected_object_set) else: logger.debug("No detections for frame " + str(current_frame_id)) detections = DetectedObjectSet() self.push_to_port_using_trait('detected_object_set', detections) logger.debug("==================") self._base_step()
def extract_chips_for_dets( self, image_files, detections ): import cv2 output_files = [] output_dets = [] # Run detector on image, TODO #if self._detector_model: #else if len( train_dets ) == 0: # continue #TODO use self._overlap_for_association = 0.90 #TODO use self._max_negs_per_frame = 10 for i in range( len( image_files ) ): filename = image_files[ i ] groundtruth = detections[ i ] if len( groundtruth ) > 0: img = cv2.imread( filename ) img_max_x = np.shape( img )[1] img_max_y = np.shape( img )[0] if len( groundtruth ) == 0: continue pos_bboxs = [] for det in groundtruth: # Extract chip for this detection bbox = det.bounding_box() bbox_min_x = int( bbox.min_x() ) bbox_max_x = int( bbox.max_x() ) bbox_min_y = int( bbox.min_y() ) bbox_max_y = int( bbox.max_y() ) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y bbox_area = bbox_width * bbox_height if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue crop = img[ bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x ] self._sample_count = self._sample_count + 1 crop_str = ( '%09d' % self._sample_count ) + ".png" new_file = os.path.join( self._chip_directory, crop_str ) cv2.imwrite( new_file, crop ) # Set new box size for this detection det.set_bounding_box( BoundingBox( 0, 0, np.shape( crop )[1], np.shape( crop )[0] ) ) new_set = DetectedObjectSet() new_set.add( det ) output_files.append( new_file ) output_dets.append( new_set ) return [ output_files, output_dets ]
def refine(self, image_data, detections): if len(detections) == 0: return detections img = image_data.asarray().astype('uint8') predictor = self.predictor img_max_x = np.shape(img)[1] img_max_y = np.shape(img)[0] # Extract patches for ROIs image_chips = [] detection_ids = [] for i, det in enumerate(detections): # Extract chip for this detection bbox = det.bounding_box() bbox_min_x = int(bbox.min_x()) bbox_max_x = int(bbox.max_x()) bbox_min_y = int(bbox.min_y()) bbox_max_y = int(bbox.max_y()) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y bbox_area = bbox_width * bbox_height if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] image_chips.append(crop) detection_ids.append(i) # Run classifier on ROIs classifications = list(predictor.predict(image_chips)) # Put classifications back into detections output = DetectedObjectSet() for i, det in enumerate(detections): if len(detection_ids) == 0 or i != detection_ids[0]: output.add(det) continue new_class = classifications[0] if new_class.data.get('prob', None) is not None: # If we have a probability for each class, uses that class_names = list(new_class.classes) class_scores = new_class.prob else: # Otherwise we only have the score for the predicted class class_names = [new_class.classes[new_class.cidx]] class_scores = [new_class.conf] if self._average_prior and det.type() is not None: priors = det.type() prior_names = priors.class_names() for name in prior_names: if name in class_names: class_scores[class_names.index(name)] += priors.score( name) else: class_names.append(name) class_scores.append(priors.score(name)) for i in range(len(class_scores)): class_scores[i] = class_scores[i] * 0.5 detected_object_type = DetectedObjectType(class_names, class_scores) det.set_type(detected_object_type) output.add(det) detection_ids.pop(0) classifications.pop(0) return output
def detect(self, image_data): input_image = image_data.asarray().astype('uint8') from mmdet.apis import inference_detector detections = inference_detector(self._model, input_image) if isinstance(detections, tuple): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size(bbox_result) > 0: bboxes = np.vstack(bbox_result) else: bboxes = [] # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] for i in inds: masks.append(maskUtils.decode(segms[i]).astype(np.bool)) # collect labels labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] if np.size(labels) > 0: labels = np.concatenate(labels) else: labels = [] # convert to kwiver format, apply threshold output = DetectedObjectSet() for bbox, label in zip(bboxes, labels): class_confidence = float(bbox[-1]) if class_confidence < self._thresh: continue bbox_int = bbox.astype(np.int32) bounding_box = BoundingBox(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) class_name = self._labels[label] detected_object_type = DetectedObjectType(class_name, class_confidence) detected_object = DetectedObject(bounding_box, np.max(class_confidence), detected_object_type) output.add(detected_object) if np.size(labels) > 0 and self._display_detections: mmcv.imshow_det_bboxes(input_image, bboxes, labels, class_names=self._labels, score_thr=self._thresh, show=True) return output
def match_point2box(self, detected_object_set1, detected_object_set2): points = [] for d in detected_object_set1: bb = d.bounding_box() points.append(bb.center()) box2pt_dict = {} for idx, d in enumerate(detected_object_set2): bb = d.bounding_box() cent = bb.center() box2pt_dict[idx] = [] for pt_idx, pt in enumerate(points): if bb_contains_pt(bb, pt): box2pt_dict[idx].append(pt_idx) pt2box_dict = {} for box_idx, pt_idxs in box2pt_dict.items(): for pt_idx in pt_idxs: if pt_idx not in pt2box_dict: pt2box_dict[pt_idx] = [] pt2box_dict[pt_idx].append(box_idx) print(box2pt_dict) print(pt2box_dict) # unmatched detections (new) # unmatches points (not found) unmatched_detections = [] for box_idx, pt_matches in box2pt_dict.items(): if len(pt_matches) == 0: unmatched_detections.append(box_idx) unmatched_points = [] for pt_idx, box_matches in pt2box_dict.items(): if len(box_matches) == 0: unmatched_points.append(pt_idx) # matched detections (found, success) # multiple matches (todo: resolve matches) point_detection_matches = {} # pt_idx -> box_idx for pt_idx, box_matches in pt2box_dict.items(): if len(box_matches) == 0: continue if len(box_matches) == 1: point_detection_matches[pt_idx] = box_matches[0] elif len(box_matches) > 1: # resolve conflict by highest confidence best_score = -1 best_det = None for det_idx in box_matches: det = detected_object_set2[det_idx] if det.confidence() > best_score: best_score = det.confidence() best_det = det_idx point_detection_matches[pt_idx] = best_det matches = DetectedObjectSet() for pt_idx, det_idx in point_detection_matches.items(): det = detected_object_set2[det_idx] pt = detected_object_set1[pt_idx] dettype = det.type() new_type = DetectedObjectType( "matched_%s" % dettype.get_most_likely_class(), 1.) det.set_type(new_type) matches.add(det) new_detections = DetectedObjectSet() for det_idx in unmatched_detections: det = detected_object_set2[det_idx] dettype = det.type() new_type = DetectedObjectType( "new_%s" % dettype.get_most_likely_class(), det.confidence()) det.set_type(new_type) new_detections.add(det) return matches, new_detections