with torch.set_grad_enabled(False): rois, cls_prob, _, _, _, _, _, _, \ pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze() boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack((boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] else: image_bboxes = None output_file = os.path.join(args.output_dir, im_file.split('.')[0]+'.npy')
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) scores_keep = torch.masked_select(scores, keep).view(batch_size, -1) proposals_keep = torch.masked_select(proposals, keep[:, :, None]).view( batch_size, -1, proposals.size(2)) # NOTE: sort on cuda tensor works differently _, order = torch.sort(scores_keep.cpu(), 1, True) for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) keep_idx_i = cpu_nms(np.hstack((proposals_single, scores_single)), nms_thresh) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output = scores.new(batch_size, num_proposal, 5).zero_() output[i, :, 0] = i output[i, :, 1:] = proposals_single return output
def get_features(): # Load arguments. MIN_BOXES = 36 MAX_BOXES = 36 N_CLASSES = 1601 CONF_THRESH = 0.2 cfg_file = 'cfgs/faster_rcnn_resnet101.yml' model_file = 'models/bottomup_pretrained_10_100.pth' image_dir = '../images' use_cuda = torch.cuda.is_available() assert use_cuda, 'Works only with CUDA' device = torch.device('cuda') if use_cuda else torch.device('cpu') cfg.CUDA = use_cuda np.random.seed(cfg.RNG_SEED) # Load the model. fasterRCNN = resnet(N_CLASSES, 101, pretrained=False) fasterRCNN.create_architecture() fasterRCNN.load_state_dict(torch.load(model_file)) fasterRCNN.to(device) fasterRCNN.eval() print('Model is loaded.') # Load images. imglist = os.listdir(image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) features = [] images = [] # Extract features. for im_file in imglist: im = cv2.imread(os.path.join(image_dir, im_file)) images.append(im) blobs, im_scales = get_image_blob(im) assert len(im_scales) == 1, 'Only single-image batch is implemented' im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device) im_info = torch.tensor([[blobs.shape[1], blobs.shape[2], im_scales[0]]]).to(device) gt_boxes = torch.zeros(1, 1, 5).to(device) num_boxes = torch.zeros(1).to(device) with torch.set_grad_enabled(False): rois, cls_prob, _, _, _, _, _, _, \ pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze() boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack( (boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] features.append(image_feat) return images, features
def extract_feature(): MIN_BOXES = 10 MAX_BOXES = 100 N_CLASSES = 1601 CONF_THRESH = 0.2 args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) os.makedirs(args.output_dir, exist_ok=True) use_cuda = torch.cuda.is_available() assert use_cuda, 'Works only with CUDA' device = torch.device('cuda') if use_cuda else torch.device('cpu') # device = torch.device('cpu') cfg.CUDA = use_cuda np.random.seed(cfg.RNG_SEED) # Load the model. fasterRCNN = resnet(N_CLASSES, 101, pretrained=False) fasterRCNN.create_architecture() fasterRCNN.load_state_dict(torch.load(args.model_file)) fasterRCNN.to(device) fasterRCNN.eval() print('Model is loaded.') # Load images. imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) # Extract features. for im_file in tqdm(imglist): im = cv2.imread(os.path.join(args.image_dir, im_file)) blobs, im_scales = get_image_blob(im) assert len(im_scales) == 1, 'Only single-image batch is implemented' im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device) im_info = torch.tensor([[blobs.shape[1], blobs.shape[2], im_scales[0]]]).to(device) gt_boxes = torch.zeros(1, 1, 5).to(device) num_boxes = torch.zeros(1).to(device) with torch.set_grad_enabled(False): rois, cls_prob, _, _, _, _, _, _, \ pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze() boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack( (boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] else: image_bboxes = None output_file = os.path.join(args.output_dir, im_file.split('.')[0] + '.npy') save_features(output_file, image_feat, image_bboxes)