def predict(self, image): try: # convert image to opencv format x = np.array(image) x = x[:, :, ::-1].copy() dataset_dict = get_image_blob(x, self._cfg.MODEL.PIXEL_MEAN) with torch.set_grad_enabled(False): boxes, scores, features_pooled, attr_scores = self._model( [dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() attr_scores = attr_scores[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= self._threshold).flatten() if len(keep_boxes) < self._min_boxes: keep_boxes = torch.argsort(max_conf, descending=True)[:self._min_boxes] elif len(keep_boxes) > self._max_boxes: keep_boxes = torch.argsort(max_conf, descending=True)[:self._max_boxes] boxes = dets[keep_boxes].numpy() objects = np.argmax(scores[keep_boxes].numpy()[:, 1:], axis=1) attr = np.argmax(attr_scores[keep_boxes].numpy()[:, 1:], axis=1) attr_conf = np.max(attr_scores[keep_boxes].numpy()[:, 1:], axis=1) outputs = [] for i in range(len(keep_boxes)): # if attr_conf[i] > attr_thresh: # cls = attributes[attr[i]+1] + " " + cls outputs.append(self._classes[objects[i] + 1]) return outputs except Exception as e: print(e, flush=True) return []
def get_detections_from_im( record, model: nn.Module, im: np.ndarray, cfg, conf_thresh=CONF_THRESH ): # im = cv2.imread(os.path.join(args.image_dir, im_file)) dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN) # extract roi features boxes, scores, features_pooled, attr_scores = model([dataset_dict]) rois = boxes[0].tensor.cpu() # unscale back to raw image space cls_boxes = rois / dataset_dict["im_scale"] scores = scores[0].cpu() pool5 = features_pooled[0].cpu() attr_prob = attr_scores[0].cpu() if "features" not in record: ix = 0 # First view in the pano elif record["featureViewIndex"].shape[0] == 0: ix = 0 # No detections in pano so far else: ix = int(record["featureViewIndex"][-1]) + 1 # Keep only the best detections max_conf = torch.zeros(rois.shape[0]) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(cls_boxes, cls_scores, NMS_THRESH) max_conf[keep] = torch.tensor( np.where( cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep] ) ) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_LOCAL_BOXES: keep_boxes = np.argsort(max_conf.numpy())[::-1][:MIN_LOCAL_BOXES] elif len(keep_boxes) > MAX_LOCAL_BOXES: keep_boxes = np.argsort(max_conf.numpy())[::-1][:MAX_LOCAL_BOXES] # Discard any box that would be better centered in another image # threshold for pixel distance from center of image hor_thresh = FOC * math.tan(math.radians(HEADING_INC / 2 + ANGLE_MARGIN)) vert_thresh = FOC * math.tan(math.radians(ELEVATION_INC / 2 + ANGLE_MARGIN)) center_x = 0.5 * (cls_boxes[:, 0] + cls_boxes[:, 2]) center_y = 0.5 * (cls_boxes[:, 1] + cls_boxes[:, 3]) reject = (center_x < WIDTH / 2 - hor_thresh) | (center_x > WIDTH / 2 + hor_thresh) heading = record["viewHeading"][ix] elevation = record["viewElevation"][ix] if ix >= VIEWS_PER_SWEEP: # Not lowest sweep reject |= center_y > HEIGHT / 2 + vert_thresh if ix < VIEWPOINT_SIZE - VIEWS_PER_SWEEP: # Not highest sweep reject |= center_y < HEIGHT / 2 - vert_thresh keep_boxes = np.setdiff1d(keep_boxes, np.argwhere(reject)) # Calculate the heading and elevation of the center of each observation featureHeading = heading + np.arctan2(center_x[keep_boxes] - WIDTH / 2, FOC) # normalize featureHeading featureHeading = np.mod(featureHeading, math.pi * 2) # force it to be the positive remainder, so that 0 <= angle < 360 featureHeading = np.expand_dims( np.mod(featureHeading + math.pi * 2, math.pi * 2), axis=1 ) # force into the minimum absolute value residue class, so that -180 < angle <= 180 featureHeading = np.where( featureHeading > math.pi, featureHeading - math.pi * 2, featureHeading ) featureElevation = np.expand_dims( elevation + np.arctan2(-center_y[keep_boxes] + HEIGHT / 2, FOC), axis=1 ) # Save features, etc if "features" not in record: record["boxes"] = cls_boxes[keep_boxes] record["cls_prob"] = scores[keep_boxes] record["attr_prob"] = attr_prob[keep_boxes] record["features"] = pool5[keep_boxes] record["featureViewIndex"] = ( np.ones((len(keep_boxes), 1), dtype=np.float32) * ix ) record["featureHeading"] = featureHeading record["featureElevation"] = featureElevation else: record["boxes"] = np.vstack([record["boxes"], cls_boxes[keep_boxes]]) record["cls_prob"] = np.vstack([record["cls_prob"], scores[keep_boxes]]) record["attr_prob"] = np.vstack([record["attr_prob"], attr_prob[keep_boxes]]) record["features"] = np.vstack([record["features"], pool5[keep_boxes]]) record["featureViewIndex"] = np.vstack( [ record["featureViewIndex"], np.ones((len(keep_boxes), 1), dtype=np.float32) * ix, ] ) record["featureHeading"] = np.vstack([record["featureHeading"], featureHeading]) record["featureElevation"] = np.vstack( [record["featureElevation"], featureElevation] ) return
def extract_feat(split_idx, img_list, cfg, args, actor: ActorHandle): num_images = len(img_list) print('Number of images on split{}: {}.'.format(split_idx, num_images)) model = DefaultTrainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume) model.eval() generate_npz_list = [] for im_file in (img_list): if os.path.exists( os.path.join(args.output_dir, im_file.split('.')[0] + '.npz')): actor.update.remote(1) continue im = cv2.imread(os.path.join(args.image_dir, im_file)) if im is None: print(os.path.join(args.image_dir, im_file), "is illegal!") actor.update.remote(1) continue dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN) # extract roi features if cfg.MODEL.BUA.EXTRACTOR.MODE == 1: attr_scores = None with torch.set_grad_enabled(False): if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) boxes = [box.tensor.cpu() for box in boxes] scores = [score.cpu() for score in scores] features_pooled = [feat.cpu() for feat in features_pooled] if not attr_scores is None: attr_scores = [attr_score.cpu() for attr_score in attr_scores] generate_npz_list.append( generate_npz.remote(1, actor, args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores)) # extract bbox only elif cfg.MODEL.BUA.EXTRACTOR.MODE == 2: with torch.set_grad_enabled(False): boxes, scores = model([dataset_dict]) boxes = [box.cpu() for box in boxes] scores = [score.cpu() for score in scores] generate_npz_list.append( generate_npz.remote(2, actor, args, cfg, im_file, im, dataset_dict, boxes, scores)) # extract roi features by bbox elif cfg.MODEL.BUA.EXTRACTOR.MODE == 3: if not os.path.exists( os.path.join(args.bbox_dir, im_file.split('.')[0] + '.npz')): actor.update.remote(1) continue bbox = torch.from_numpy( np.load( os.path.join(args.bbox_dir, im_file.split('.')[0] + '.npz'))['bbox']) * dataset_dict['im_scale'] proposals = Instances(dataset_dict['image'].shape[-2:]) proposals.proposal_boxes = BUABoxes(bbox) dataset_dict['proposals'] = proposals attr_scores = None with torch.set_grad_enabled(False): if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) boxes = [box.tensor.cpu() for box in boxes] scores = [score.cpu() for score in scores] features_pooled = [feat.cpu() for feat in features_pooled] if not attr_scores is None: attr_scores = [ attr_score.data.cpu() for attr_score in attr_scores ] generate_npz_list.append( generate_npz.remote(3, actor, args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores)) ray.get(generate_npz_list)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection2 Inference") parser.add_argument( "--config-file", default="configs/bua-caffe/extract-bua-caffe-r101.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--mode", default="caffe", type=str, help="bua_caffe, ...") parser.add_argument('--out-dir', dest='output_dir', help='output directory for features', default="features") parser.add_argument('--image-dir', dest='image_dir', help='directory with images', default="image") parser.add_argument( "--resume", action="store_true", help="whether to attempt to resume from the checkpoint directory", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg = setup(args) MIN_BOXES = 10 MAX_BOXES = 100 CONF_THRESH = 0.2 model = DefaultTrainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume) # Extract features. imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) model.eval() for im_file in tqdm.tqdm(imglist): im = cv2.imread(os.path.join(args.image_dir, im_file)) dataset_dict = get_image_blob(im) with torch.set_grad_enabled(False): # boxes, scores, features_pooled = model([dataset_dict]) if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes] image_bboxes = dets[keep_boxes] image_objects_conf = np.max(scores[keep_boxes].numpy(), axis=1) image_objects = np.argmax(scores[keep_boxes].numpy(), axis=1) if cfg.MODEL.BUA.ATTRIBUTE_ON: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1) image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1) info = { 'image_id': im_file.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf, 'attrs_id': image_attrs, 'attrs_conf': image_attrs_conf, } else: info = { 'image_id': im_file.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf } output_file = os.path.join(args.output_dir, im_file.split('.')[0]) np.savez_compressed(output_file, x=image_feat, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1), info=info)
def extract_feat(image_path): MIN_BOXES = 10 MAX_BOXES = 100 CONF_THRESH = 0.2 im = cv2.imread(image_path) print('image shape:', im.shape) dataset_dict = get_image_blob(im) with torch.set_grad_enabled(False): # boxes, scores, features_pooled = model([dataset_dict]) if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = net_img( [dataset_dict]) else: boxes, scores, features_pooled = net_img([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes] image_bboxes = dets[keep_boxes] image_objects_conf = np.max(scores[keep_boxes].numpy(), axis=1) image_objects = np.argmax(scores[keep_boxes].numpy(), axis=1) if cfg.MODEL.BUA.ATTRIBUTE_ON: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1) image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1) info = { 'image_id': image_path.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf, 'attrs_id': image_attrs, 'attrs_conf': image_attrs_conf, } else: info = { 'image_id': image_path.split('.')[0], 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects_id': image_objects, 'objects_conf': image_objects_conf } return image_feat, image_bboxes, im.shape[:2]
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection2 Inference") parser.add_argument( "--config-file", default="configs/bua-caffe/extract-bua-caffe-r101.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--mode", default="caffe", type=str, help="bua_caffe, ...") parser.add_argument('--out-dir', dest='output_dir', help='output directory for features', default="data/features") parser.add_argument('--image-dir', dest='image_dir', help='directory with images', default="data/coco_img") parser.add_argument('--gt-bbox-dir', dest='gt_bbox_dir', help='directory with gt-bbox', default="data/bbox") parser.add_argument( "--resume", action="store_true", help="whether to attempt to resume from the checkpoint directory", ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg = setup(args) MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH model = DefaultTrainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume) # Extract features. imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) model.eval() for im_file in tqdm.tqdm(imglist): if os.path.exists( os.path.join(args.output_dir, im_file.split('.')[0] + '.npz')): continue im = cv2.imread(os.path.join(args.image_dir, im_file)) if im is None: print(os.path.join(args.image_dir, im_file), "is illegal!") continue dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN) # extract roi features if cfg.MODEL.BUA.EXTRACTOR.MODE == 1: attr_scores = None with torch.set_grad_enabled(False): if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) save_roi_features(args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores) # extract bbox only elif cfg.MODEL.BUA.EXTRACTOR.MODE == 2: with torch.set_grad_enabled(False): boxes, scores = model([dataset_dict]) save_bbox(args, cfg, im_file, im, dataset_dict, boxes, scores) # extract roi features by gt bbox elif cfg.MODEL.BUA.EXTRACTOR.MODE == 3: if not os.path.exists( os.path.join(args.gt_bbox_dir, im_file.split('.')[0] + '.npz')): continue bbox = torch.from_numpy( np.load( os.path.join(args.gt_bbox_dir, im_file.split('.')[0] + '.npz'))['bbox']) * dataset_dict['im_scale'] proposals = Instances(dataset_dict['image'].shape[-2:]) proposals.proposal_boxes = BUABoxes(bbox) dataset_dict['proposals'] = proposals attr_scores = None with torch.set_grad_enabled(False): if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) save_roi_features_by_gt_bbox(args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores)
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection2 Inference") parser.add_argument( "--config-file", default="configs/bua-caffe/extract-bua-caffe-r101-fix36.yaml", metavar="FILE", help="path to config file", ) # --image-dir or --image parser.add_argument('--image-dir', dest='image_dir', help='directory with images', default="datasets/demos") parser.add_argument( '--image', dest='image', help='image') # e.g. datasets/demos/COCO_val2014_000000060623.jpg parser.add_argument("--mode", default="caffe", type=str, help="bua_caffe, ...") parser.add_argument('--out-dir', dest='output_dir', help='output directory for features', default="features") parser.add_argument('--out-name', dest='output_name', help='output file name for features', default="demos") parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() cfg = setup(args) model = DefaultTrainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=True) model.eval() # Extract features. if args.image: imglist = [args.image] else: imglist = os.listdir(args.image_dir) imglist = [os.path.join(args.image_dir, fn) for fn in imglist] num_images = len(imglist) print('Number of images: {}.'.format(num_images)) imglist.sort() MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH classes = [] with open(os.path.join('evaluation/objects_vocab.txt')) as f: for object in f.readlines(): names = [n.lower().strip() for n in object.split(',')] classes.append(names[0]) attributes = [] with open(os.path.join('evaluation/attributes_vocab.txt')) as f: for att in f.readlines(): names = [n.lower().strip() for n in att.split(',')] attributes.append(names[0]) classes = np.array(classes) attributes = np.array(attributes) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with h5py.File(os.path.join(args.output_dir, '%s_fc.h5' % args.output_name), 'a') as file_fc, \ h5py.File(os.path.join(args.output_dir, '%s_att.h5' % args.output_name), 'a') as file_att, \ h5py.File(os.path.join(args.output_dir, '%s_box.h5' % args.output_name), 'a') as file_box: informations = {} try: for im_file in tqdm.tqdm(imglist): img_nm = os.path.basename(im_file) im = cv2.imread(im_file) if im is None: print(im_file, "is illegal!") continue dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN) # extract roi features attr_scores = None with torch.set_grad_enabled(False): if cfg.MODEL.BUA.ATTRIBUTE_ON: boxes, scores, features_pooled, attr_scores = model( [dataset_dict]) else: boxes, scores, features_pooled = model([dataset_dict]) dets = boxes[0].tensor.cpu() / dataset_dict['im_scale'] scores = scores[0].cpu() feats = features_pooled[0].cpu() max_conf = torch.zeros((scores.shape[0])).to(scores.device) for cls_ind in range(1, scores.shape[1]): cls_scores = scores[:, cls_ind] keep = nms(dets, cls_scores, 0.3) max_conf[keep] = torch.where( cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten() if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] image_feat = feats[keep_boxes].numpy() image_bboxes = dets[keep_boxes].numpy() image_objects_conf = np.max(scores[keep_boxes].numpy()[:, 1:], axis=1) image_objects = classes[np.argmax( scores[keep_boxes].numpy()[:, 1:], axis=1)] info = { 'image_name': img_nm, 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), 'objects': image_objects, 'objects_conf': image_objects_conf } if attr_scores is not None: attr_scores = attr_scores[0].cpu() image_attrs_conf = np.max( attr_scores[keep_boxes].numpy()[:, 1:], axis=1) image_attrs = attributes[np.argmax( attr_scores[keep_boxes].numpy()[:, 1:], axis=1)] info['attrs'] = image_attrs info['attrs_conf'] = image_attrs_conf file_fc.create_dataset(img_nm, data=image_feat.mean(0)) file_att.create_dataset(img_nm, data=image_feat) file_box.create_dataset(img_nm, data=image_bboxes) informations[img_nm] = info finally: file_fc.close() file_att.close() file_box.close() pickle.dump( informations, open( os.path.join(args.output_dir, '%s_info.pkl' % args.output_name), 'wb')) print( '--------------------------------------------------------------------' )