def detect_one_image(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from TF model, takes image and returns (boxes, probs, labels) Returns: [DetectionResult] """ orig_shape = img.shape[:2] resizer = CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_img = resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) boxes, probs, labels, fv = model_func(resized_img) boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) results = [ DetectionResult(*args) for args in zip(boxes, probs, labels, fv) ] return results
def detect_one_image(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from TF model, takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ orig_shape = img.shape[:2] resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE) resized_img = resizer.augment(img) scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2 boxes, probs, labels, *masks = model_func(resized_img) boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: # has mask full_masks = [fill_full_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0])] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [DetectionResult(*args) for args in zip(boxes, probs, labels, masks)] return results
def fastrcnn_inference(image_shape2d, rcnn_boxes, rcnn_label_logits, rcnn_box_logits): """ Args: image_shape2d: h, w rcnn_boxes (nx4): the proposal boxes rcnn_label_logits (n): rcnn_box_logits (nx #class x 4): Returns: boxes (mx4): labels (m): each >= 1 """ rcnn_box_logits = rcnn_box_logits[:, 1:, :] rcnn_box_logits.set_shape([None, cfg.DATA.NUM_CATEGORY, None]) label_probs = tf.nn.softmax( rcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1), [1, cfg.DATA.NUM_CATEGORY, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( rcnn_box_logits / tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs') final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes') final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels') return final_boxes, final_labels, final_probs
def predict_image_track_with_precomputed_ref_features(img, ref_features, model_func): orig_shape = img.shape[:2] resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_img = resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) boxes, probs, labels, *masks = model_func(resized_img, ref_features) boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: # has mask full_masks = [ _paste_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [ DetectionResult(*args) for args in zip(boxes, probs, labels, masks) ] return results
def predict_resized_image(resized_img, orig_shape, scale, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from the TF model. It takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ boxes, probs, labels, *masks = model_func(resized_img) # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: full_masks = [ _paste_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [ DetectionResult(*args) for args in zip(boxes, probs, labels.tolist(), masks) ] return results
def predict_image(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from the TF model. It takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ orig_shape = img.shape[:2] resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_img = resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) boxes, probs, labels, *masks = model_func(resized_img) # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: full_masks = [_paste_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0])] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [DetectionResult(*args) for args in zip(boxes, probs, labels.tolist(), masks)] return results
def detect_one_image_scale(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from TF model, takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ scores_ts = [] boxes_ts = [] labels_ts = [] masks_ts = [] def add_preds_t(scores_t, boxes_t, labels_t, masks_t): scores_ts.append(scores_t) boxes_ts.append(boxes_t) labels_ts.append(labels_t) masks_ts.append(masks_t) orig_shape = img.shape[:2] for bbox_aug_scale in cfg.TEST.BBOX_AUG_SCALES: resizer = CustomResize(bbox_aug_scale, cfg.TEST.BBOX_AUG_MAX_SIZE) resized_img = resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) boxes, probs, labels, *masks = model_func(resized_img) boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) add_preds_t(probs, boxes, labels, masks) if cfg.TEST.BBOX_AUG_COORD_HEUR == 'UNION': boxes_c = np.vstack(boxes_ts) scores_c = np.vstack(scores_ts) lables_c = np.vstack(labels_ts) masks_c = np.vstack(masks_ts) # Apply NMS logger.info("detect_one_image_scale...") logger.info(boxes_c) logger.info(scores_c) if masks: # has mask full_masks = [fill_full_mask(box, mask, orig_shape) for box, mask in zip(boxes_c, masks_c[0])] masks = full_masks else: # fill with none masks = [None] * len(boxes_c) results = [DetectionResult(*args) for args in zip(boxes_c, scores_c, lables_c, masks)] return results
def predict_image(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from the TF model. It takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ global total_time global cnt print("predict_image") # print("model_func") # print(model_func) orig_shape = img.shape[:2] resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_img = resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) start_time = time.time() boxes, probs, labels, *masks = model_func(resized_img) end_time = time.time() cnt += 1 total_time += end_time - start_time print( f"--------- Inference time : {total_time / cnt}seconds -----------------" ) # print(f"boxes : {boxes}") # print(f"probs : {probs}") # print(f"labels : {labels}") # print(f"masks : {masks}") # print(len(masks)) # 1 # print(masks[0].shape) # (11, 28, 28) # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: full_masks = [ _paste_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [ DetectionResult(*args) for args in zip(boxes, probs, labels.tolist(), masks) ] return results
def predict_crop(self, img, debug_id=None): start_time = time.time() orig_shape = img.shape[:2] resized_img = self.resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) boxes, probs, labels, *masks = self.sess.run( self.outputs_tensor, feed_dict={self.input_tensor: resized_img}) # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: full_masks = [ self._paste_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) polygons = [] # Estimate polygon based on the mask right here for mask in masks: temp_mask = np.expand_dims(mask, axis=-1) * 255 cnts = cv2.findContours(temp_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) cnt = max(cnts, key=cv2.contourArea) peri = cv2.arcLength(cnt, True) estimated_polygon = cv2.approxPolyDP(cnt, 0.02 * peri, True) polygons.append(estimated_polygon) # temp_mask = cv2.cvtColor(temp_mask, cv2.COLOR_GRAY2BGR) # viz_img = cv2.polylines(temp_mask, [estimated_polygon], isClosed=True, color=(255, 0, 255), thickness=10) # cv2.imwrite('mask.png', viz_img) # import ipdb; ipdb.set_trace() results = [ DetectionResult(*args) for args in zip(boxes, probs, labels.tolist(), masks, polygons) ] if self.debug: print('Crop tooks {} secs.'.format(time.time() - start_time)) debug_id = str(uuid.uuid4()) if debug_id is None else debug_id debug_path = os.path.join('./debugs/', debug_id) os.makedirs(debug_path, exist_ok=True) final = draw_final_outputs_blackwhite(img, results) # cv2.imwrite(debug_path, final) cv2.imwrite(os.path.join(debug_path, 'prediction.png'), final) return results, debug_path return results
def detect_one_image_cls(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from TF model, takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ orig_shape = img.shape[:2] resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_img = resizer.augment(img) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) boxes, probs, labels, ious, img_level_label, img_level_label_score, *masks = model_func( resized_img) boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) # box, prob, keep = soft_nms_py(boxes, probs, overlap_thresh=0.3, score_thresh=0.001, method='gaussian') # top_det, top_score = box_voting(box, prob, boxes, probs, thresh=0.7, scoring_method='ID') # # labels = labels[keep] # ious = ious[keep] # boxes = top_det # probs = top_score if masks: # has mask full_masks = [ fill_full_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [ DetectionResult(*args) for args in zip(boxes, probs, labels, ious, masks) ] return results, img_level_label[0]
def predict_image_batch(img_batch, model_func, resized_sizes, scales, orig_sizes): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from the TF model. It takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ resized_sizes = np.stack(resized_sizes) resized_sizes_in = np.concatenate((resized_sizes, 3 * np.ones( (resized_sizes.shape[0], 1))), axis=1) indices, boxes, probs, labels, *masks = model_func(img_batch, resized_sizes_in) results = [] for i in range(len(scales)): ind = np.where(indices.astype(np.int32) == i)[0] if len(ind) > 0: boxes[ind, :] = boxes[ind, :] / scales[i] # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes[ind, :] = clip_boxes(boxes[ind, :], orig_sizes[i]) if masks and len(ind) > 0: # has mask full_masks = [ _paste_mask(box, mask, orig_sizes[i]) for box, mask in zip(boxes[ind, :], masks[0][ind, :]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes[ind, :]) results.append( [DetectionResult(*args) for args in zip(boxes, probs, labels, masks)]) return results
def visualize(model_path, nr_visualize=50, output_dir='output'): pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(model_path), input_names=['image', 'gt_boxes', 'gt_labels'], output_names=[ 'generate_rpn_proposals/boxes', 'generate_rpn_proposals/probs', 'fastrcnn_all_probs', 'fastrcnn_fg_probs', 'fastrcnn_fg_boxes', ])) df = get_train_dataflow() df.reset_state() if os.path.isdir(output_dir): shutil.rmtree(output_dir) utils.fs.mkdir_p(output_dir) with tqdm.tqdm(total=nr_visualize) as pbar: for idx, dp in itertools.islice(enumerate(df.get_data()), nr_visualize): img, _, _, gt_boxes, gt_labels = dp rpn_boxes, rpn_scores, all_probs, fg_probs, fg_boxes = pred( img, gt_boxes, gt_labels) gt_viz = draw_annotation(img, gt_boxes, gt_labels) proposal_viz, good_proposals_ind = draw_proposal_recall( img, rpn_boxes, rpn_scores, gt_boxes) score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_probs[good_proposals_ind]) fg_boxes = clip_boxes(fg_boxes, img.shape[:2]) fg_viz = draw_predictions(img, fg_boxes, fg_probs) results = nms_fastrcnn_results(fg_boxes, fg_probs) final_viz = draw_final_outputs(img, results) viz = tpviz.stack_patches( [gt_viz, proposal_viz, score_viz, fg_viz, final_viz], 2, 3) if os.environ.get('DISPLAY', None): tpviz.interactive_imshow(viz) cv2.imwrite("{}/{:03d}.png".format(output_dir, idx), viz) pbar.update()
def detect_one_image(img, model_func): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from TF model, takes [image] and returns (probs, boxes) Returns: [DetectionResult] """ resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE) resized_img = resizer.augment(img) scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2 fg_probs, fg_boxes = model_func(resized_img) fg_boxes = fg_boxes / scale fg_boxes = clip_boxes(fg_boxes, img.shape[:2]) return nms_fastrcnn_results(fg_boxes, fg_probs)
def post_processing_inference(boxes, masks, labels, probs, scale, orig_shape): # Some slow numpy postprocessing: boxes = boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. boxes = clip_boxes(boxes, orig_shape) if masks: full_masks = [ _paste_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) results = [ DetectionResult(*args) for args in zip(boxes, probs, labels.tolist(), masks) ] return results
def detect_one_image(img, model_func, *args): """ Run detection on one image, using the TF callable. This function should handle the preprocessing internally. Args: img: an image model_func: a callable from TF model, takes image and returns (boxes, probs, labels, [masks]) Returns: [DetectionResult] """ orig_shape = img.shape[:2] resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE) resized_img = resizer.augment(img) scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2 if config.USE_SECOND_HEAD: if config.EXTRACT_FEATURES: boxes, probs, labels, posteriors, second_labels, second_posteriors, masks, features = model_func( resized_img) masks = [masks] else: boxes, probs, labels, posteriors, second_labels, second_posteriors, *masks = model_func( resized_img) features = [None for _ in range(labels.size)] else: if config.EXTRACT_FEATURES: boxes, probs, labels, posteriors, masks, features = model_func( resized_img, *args) masks = [masks] else: boxes, probs, labels, posteriors, *masks = model_func(resized_img) features = [None for _ in range(labels.size)] boxes = boxes / scale boxes = clip_boxes(boxes, orig_shape) if masks: # has mask full_masks = [ fill_full_mask(box, mask, orig_shape) for box, mask in zip(boxes, masks[0]) ] masks = full_masks else: # fill with none masks = [None] * len(boxes) if config.USE_SECOND_HEAD: results = [ SecondDetectionResult(*args) for args in zip(boxes, probs, labels, posteriors, masks, second_labels, second_posteriors, features) ] else: results = [ DetectionResult(*args) for args in zip(boxes, probs, labels, posteriors, masks, features) ] return results
max_iters = df.size() save_folder = '/media/ayan/Drive/All_Object/tensorpack-master/Faster_RCNN_Test/Object-Detection-Metrics-master_2/' while iter < max_iters: iter = iter + 1 print(iter) try: batch_image, batch_anchor_labels, batch_anchor_boxes, batch_gt_boxes, batch_gt_labels = next( data_generator) except StopIteration: break orig_shape = batch_image.shape[:2] feed_dict = {image_P: batch_image} final_boxes_, final_labels_, final_probs_ = sess.run( [final_boxes, final_labels, final_probs], feed_dict) final_boxes_ = clip_boxes(final_boxes_, orig_shape) final_boxes_ = sess.run(final_boxes_) final_boxes_ = final_boxes_.astype('int32') if np.any(final_boxes_): tags = [ "{},{:.2f}".format(cfg.DATA.CLASS_NAMES[lb], score) for lb, score in zip(final_labels_, final_probs_) ] final_viz = viz.draw_boxes(batch_image, final_boxes_, tags) gt_viz = draw_annotation(batch_image, batch_gt_boxes, batch_gt_labels) img_out = np.hstack((final_viz, gt_viz)) imageio.imwrite(os.path.join(save_path, str(iter) + ".jpg"), img_out) Detection = [] for ik in range(final_boxes_.shape[0]):