def _add_proposals_from_file( self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh ): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image( boxes, entry['height'], entry['width'] ) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh)
def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh)
def forward(self, inputs, outputs): up_scale = self.up_scale M = self.resolution for lvl in range(self.num_fpn_lvls): feat = inputs[2 * lvl].data rois = inputs[2 * lvl + 1].data num_rois = rois.shape[0] spatial_scale = self.spatial_scales[lvl] # convert from NCHW to NHWC feat = feat.transpose((0, 2, 3, 1)) feat_h, feat_w = feat.shape[1], feat.shape[2] # pad rois and narrow to the feature map scale pad_rois = box_utils.expand_boxes(rois[:, 1:5], up_scale) pad_rois = pad_rois * spatial_scale pad_rois = box_utils.clip_boxes_to_image(pad_rois, feat_h, feat_w) # abstact feature from the pad_rois pad_roi_feats = np.zeros((num_rois, M, M, feat.shape[3])) batch_idx = rois[:, 0] for i in range(num_rois): batch_id = int(batch_idx[i]) pad_roi = pad_rois[i] pad_roi_feat = feat[batch_id, pad_roi[1]:pad_roi[3] + 1, pad_roi[0]:pad_roi[2] + 1, :] pad_roi_feat_resize = cv2.resize(pad_roi_feat, (M, M)) pad_roi_feats[i] = pad_roi_feat_resize pad_roi_feats = pad_roi_feats.transpose((0, 3, 1, 2)) outputs[lvl].reshape(pad_roi_feats.shape) outputs[lvl].data[...] = pad_roi_feats
def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k): """Add proposals from a proposals file to an roidb. """ logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) #proposals[seq_idx][idx] box_list = [] for i, entry in enumerate(roidb): if i % 500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) seq_idx = entry['seq_idx'] idx = entry['idx'] boxes = proposals['boxes'][seq_idx][idx] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list)
def get_context_rois(self, rois, im_info, zoom_ratio=1.1): """Return the rois with more context. - 'im_info': 2D tensor of shape (N, 3) where the three columns encode the input image's [height, width, scale]. Height and width are for the input to the network, not the original image; scale is the scale factor used to scale the original image to the network input size. - 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the five columns encode [batch ind, x1, y1, x2, y2]. The boxes are w.r.t. the network input, which is a *scaled* version of the original image; these proposals must be scaled by 1 / scale (where scale comes from im_info; see above) to transform it back to the original input image coordinate system. """ if zoom_ratio > 0: roi_boxes = rois[:, 1:] batch_indice = rois[:, 0] roi_boxes = boxes_utils.expand_boxes(roi_boxes, zoom_ratio) roi_boxes = boxes_utils.clip_boxes_to_image(roi_boxes, im_info[0], im_info[1]) rois = np.concatenate((batch_indice, roi_boxes), axis=1) else: # Add global context, i.e. roi-pooling on the whole feature map. rois[:, 1] = 0 rois[:, 2] = 0 rois[:, 3] = 800 - 1 rois[:, 4] = 800 - 1 #rois[:, 3] = im_info[0][1] - 1 #rois[:, 4] = im_info[0][0] - 1 return rois
def forward(self, inputs, outputs): data = inputs[0].data k_min = self.k_min k_max = self.k_max up_scale = self.up_scale height, width = data.shape[2], data.shape[3] for lvl in range(k_min, k_max + 1): rois = inputs[1 + lvl - k_min].data # skip the 'data' blob bboxes = rois[:, 1:5] batch_ids = rois[:, [0]] # up-scale the bboxes and narrow down to image boundary pad_bboxes = box_utils.expand_boxes(bboxes, up_scale) pad_bboxes = box_utils.clip_boxes_to_image(pad_bboxes, height, width) # add the batch_ids to the rois pad_rois = np.hstack((batch_ids, pad_bboxes)) outputs[lvl - k_min].reshape(pad_rois.shape) outputs[lvl - k_min].data[...] = pad_rois # copy rois_idx_restore_int32 to the scale_rois_idx_restore_int32 # A little surgery for int32 type requirement rois_idx_restore_int32 = inputs[-1].data outputs[-1].init(list(rois_idx_restore_int32.shape), caffe2_pb2.TensorProto.INT32) outputs[-1].reshape(rois_idx_restore_int32.shape) outputs[-1].data[...] = rois_idx_restore_int32.astype(np.int32)
def add_refine_keypoints_blobs_gaussian(blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx, data): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] gt_keypoints = roidb['gt_keypoints'] # Load the kp_fg_inds generated by keypoint_rcnn.py. So we avoid the issue # of mismatched keypoint_rois and refined_keypoint_rois, which cause a big # issue for training. kp_fg_inds = blobs['keypoint_fg_inds'] if kp_fg_inds.shape[0] > 0: sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] # Let's expand the rois up_scale = cfg.REFINENET.UP_SCALE inp_h, inp_w = data.shape[2], data.shape[3] pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale pad_fg_rois = box_utils.expand_boxes(sampled_fg_rois, up_scale) pad_fg_rois = box_utils.clip_boxes_to_image(pad_fg_rois, pad_img_h, pad_img_w) num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(pad_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype) for ii in range(len(pad_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_gaussian_heatmap_labels( sampled_keypoints, pad_fg_rois, M=cfg.REFINENET.KRCNN.HEATMAP_SIZE) else: # If there are no fg keypoint rois (it does happen) # The network cannot handle empty blobs, so we must provide a heatmap # We simply take the first bg roi, given it an all zero heatmap, and # set its weights to zero (ignore label). roi_inds = np.where(roidb['gt_classes'] == 0)[0] # sampled_fg_rois is actually one random roi, but that's ok because ... pad_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1)) # We give it an 0's blob M = cfg.REFINENET.KRCNN.HEATMAP_SIZE heats = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, M, M)) # We set weights to 0 (ignore label) weights = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, 1)) pad_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((pad_fg_rois.shape[0], 1)) pad_fg_rois = np.hstack((repeated_batch_idx, pad_fg_rois)) blobs['refined_keypoint_rois'] = pad_fg_rois blobs['refined_keypoint_heatmaps'] = heats blobs['refined_keypoint_weights'] = weights
def forward(self, inputs, outputs): data = inputs[0].data mask_probs = inputs[1].data mask_rois = inputs[2].data # whether using binary threshold for indicator if cfg.REFINENET.USE_PERCENTTHRES_INDICATOR: mask_probs_reshape = mask_probs.reshape(mask_probs.shape[0], mask_probs.shape[1],-1) thres = int(cfg.REFINENET.PERCENTINDICATOR_THRES * \ mask_probs_reshape.shape[2]) mask_probs_sort = np.argsort(-mask_probs_reshape, axis=2)[:,:,:thres] mask_binary = np.zeros(mask_probs_reshape.shape, dtype=np.float32) for i in range(mask_probs_sort.shape[0]): for j in range(mask_probs_sort.shape[1]): mask_binary[i,j,mask_probs_sort[i,j]] = 1. mask_binary = mask_binary.reshape(mask_probs.shape) mask_probs *= mask_binary elif cfg.REFINENET.USE_THRES_INDICATOR: mask_binary = np.array( mask_probs > cfg.REFINENET.INDICATOR_THRES, dtype=np.float32 ) mask_probs *= mask_binary # output indicator resolution M = self.resolution up_scale = self.up_scale num_cls = mask_probs.shape[1] num_rois = mask_rois.shape[0] mask_indicators = np.zeros((num_rois, M, M, num_cls), dtype=np.float32) # preparing data height, width = data.shape[2], data.shape[3] mask_probs_NHWC = mask_probs.transpose((0,2,3,1)) rois = mask_rois[:, 1:5] # ignore batch_id pad_rois = box_utils.expand_boxes(rois, up_scale) pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width) # calculate converted coordinates converted_coords = box_utils.convert_coordinate(rois, pad_rois, M) for i in range(num_rois): mask_prob = mask_probs_NHWC[i] coords = converted_coords[i] shape = (coords[2]-coords[0]+1, coords[3]-coords[1]+1) # w,h if shape[0] < 1 or shape[1] < 1: continue mask_prob_resize = cv2.resize(mask_prob, shape) if mask_prob_resize.shape[2] == 1: mask_prob_resize = mask_prob_resize[:, :, np.newaxis] mask_indicators[i, coords[1]:coords[3]+1, coords[0]:coords[2]+1] = \ mask_prob_resize swap_order = (0, 3, 1, 2) mask_indicators = mask_indicators.transpose(swap_order) outputs[0].reshape(mask_indicators.shape) outputs[0].data[...] = mask_indicators
def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) # with open(proposal_file, 'rb') as f: # proposals = pickle.load(f, encoding='bytes') with open(proposal_file, 'rb') as f: proposals1 = pickle.load(f, encoding='bytes') #[b'cfg', b'boxes', b'scores', b'ids', b'names'] proposals = {} proposals['cfg'] = proposals1[b'cfg'] proposals['boxes'] = proposals1[b'boxes'] proposals['scores'] = proposals1[b'scores'] proposals['ids'] = proposals1[b'ids'] proposals['names'] = proposals1[b'names'] #proposals = pd.read_pickle(proposal_file) print(proposals.keys()) print( np.array(proposals['boxes']).shape, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") # proposals = self.convert(proposals) print(proposals.keys()) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] print("entry id in entry['id'] ", entry['id']) #print(proposals[id_field][i]) # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) # keep = box_utils.unique_boxes(boxes) # boxes = boxes[keep, :] # keep = box_utils.filter_small_boxes(boxes, min_proposal_size) # boxes = boxes[keep, :] # if top_k > 0: # boxes = boxes[:top_k, :] box_list.append(boxes) print("shape box_list!!!!!!!!!!!!!!!!!!!!!!!!! ", np.array(box_list)) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh)
def forward(self, inputs, outputs): data = inputs[0].data rois = inputs[1].data up_scale = self.up_scale height, width = data.shape[2], data.shape[3] bboxes = rois[:, 1:5] batch_ids = rois[:, [0]] # up-scale the bboxes and clip to image boundary # pad bboxes and narrow to the feature map scale pad_bboxes = box_utils.expand_boxes(bboxes, up_scale) pad_bboxes = box_utils.clip_boxes_to_image(pad_bboxes, height, width) # add the batch_ids to the rois pad_rois = np.hstack((batch_ids, pad_bboxes)) outputs[0].reshape(pad_rois.shape) outputs[0].data[...] = pad_rois
def forward(self, inputs, outputs): data = inputs[0].data keypoint_probs = inputs[1].data keypoint_rois = inputs[2].data # output indicator resolution M = self.resolution up_scale = self.up_scale num_rois = keypoint_rois.shape[0] num_keypoints = keypoint_probs.shape[1] # first expand the keypoint rois height, width = data.shape[2], data.shape[3] pad_rois = box_utils.expand_boxes(keypoint_rois[:, 1:5], up_scale) pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width) # get keypoint predictions and their probs # output shape is (#rois, 3, #keypoints) and 3 means (x, y, prob) pred_rois = keypoint_utils.probs_to_keypoints(keypoint_probs, keypoint_rois) # map keypoint position to the pad_rois # output shape is (#rois, #keypoints), locations flatter out locations_on_pad_rois, _ = keypoint_utils.keypoints_to_heatmap_labels( pred_rois, pad_rois, M ) locations_on_pad_rois = locations_on_pad_rois.astype(np.int32) # and now generate keypoint indicators keypoint_indicators = blob_utils.zeros((num_rois, num_keypoints, M**2)) for i in range(num_rois): locations = locations_on_pad_rois[i] # shape (#keypoints, ) for k in range(num_keypoints): keypoint_indicators[i, k, locations[k]] = pred_rois[i, 2, k] # and reshape to 4 dimension keypoint_indicators = keypoint_indicators.reshape( (num_rois, num_keypoints, M, M) ) outputs[0].reshape(keypoint_indicators.shape) outputs[0].data[...] = keypoint_indicators
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx, data): """Add RefineNet Mask specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.REFINENET.RESOLUTION up_scale = cfg.REFINENET.UP_SCALE polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] gt_classes = roidb['gt_classes'][polys_gt_inds] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 # Define size variables inp_h, inp_w = data.shape[2], data.shape[3] pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # Expand the foreground rois by a factor of up_scale and # clip by the padded image boundary pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale) pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h, pad_img_w) if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES: # Only use crowded samples to train the RefineNet THRES = cfg.REFINENET.OVERLAP_THRESHOLD for i in range(rois_fg.shape[0]): overlap = overlaps_bbfg_bbpolys[i] if np.sum(overlap > THRES) > 1: # if has multiple instances overlapped, use it for training fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box( poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # Only one instance, then set label to be -1 (ignored) masks[i, :] = -1 mask_class_labels[i] = 0 elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: loss_weights = blob_utils.ones((rois_fg.shape[0], )) for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] class_label = mask_class_labels[i] # Rasterize the portion of the polygon mask within the given # fg roi to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # And now determine the weight for each roi. If any instance # that is of the same class as the RoI, then we expect it to # be a hard sample and assigns a larger weight for this RoI for j in range(len(polys_gt)): if j == fg_polys_ind: continue if gt_classes[ j] == class_label: # only same class is valid mask = segm_utils.polys_to_mask_wrt_box( polys_gt[j], pad_roi_fg, M) # and check if has anypart fall inside the bbox is_inside_bbox = (np.sum(mask) > 0) if is_inside_bbox: loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED break # early stop else: # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] pad_roi_fg = pad_rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # pad_rois_fg is actually one background roi, but that's ok because ... pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1)) pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32) # Update blobs dict with Refine-Net blobs blobs['refined_mask_rois'] = pad_rois_fg blobs['roi_has_refined_mask_int32'] = roi_has_mask blobs['refined_masks_int32'] = masks if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES: blobs['loss_weights'] = loss_weights
def __init__(self, root, year='2007', image_set='trainval', download=False, transforms=None): self.root = root self.transforms = transforms self.year = year if year == "2007" and image_set == "test": year = "2007-test" self.url = DATASET_YEAR_DICT[year]['url'] self.filename = DATASET_YEAR_DICT[year]['filename'] self.md5 = DATASET_YEAR_DICT[year]['md5'] valid_sets = ["train", "trainval", "val"] if year == "2007-test": valid_sets.append("test") self.image_set = verify_str_arg(image_set, "image_set", valid_sets) base_dir = DATASET_YEAR_DICT[year]['base_dir'] voc_root = os.path.join(self.root, base_dir) image_dir = os.path.join(voc_root, 'JPEGImages') annotation_dir = os.path.join(voc_root, 'Annotations') if download: download_extract(self.url, self.root, self.filename, self.md5) # pause() if not os.path.isdir(voc_root): raise RuntimeError('Dataset not found or corrupted.' + ' You can use download=True to download it') splits_dir = os.path.join(voc_root, 'ImageSets/Main') split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt') with open(os.path.join(split_f), "r") as f: file_names = [x.strip() for x in f.readlines()] self.class_labels = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] self.num_classes = len(self.class_labels) self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names] assert (len(self.images) == len(annotations)) #load annotations: self.annotations = [] for i, ann_path in enumerate(annotations): raw_annot = self.parse_voc_xml(ET.parse(ann_path).getroot())['annotation'] self.annotations.append({ 'img_key': int(raw_annot['filename'].replace('.jpg','')), 'width' : int(raw_annot['size']['width']), 'height' : int(raw_annot['size']['height']), 'object' : raw_annot['object'] }) ##TODO ajust this to be more beauty =p ss_data = self.root + 'selective_search_data/voc_' + self.year + '_' + self.image_set + '.pkl' with open(ss_data, 'rb') as f: proposals = pickle.load(f) sort_proposals(proposals, 'indexes') self.proposals = [] for i, boxes in enumerate(proposals['boxes']): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(proposals['boxes']))) annotation = self.annotations[i] assert annotation['img_key'] == proposals['indexes'][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, annotation['height'], annotation['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, cfg.FAST_RCNN.MIN_PROPOSAL_SIZE) boxes = boxes[keep, :] if cfg.FAST_RCNN.TOP_K > 0: boxes = boxes[:cfg.FAST_RCNN.TOP_K, :] self.proposals.append(boxes.astype(np.float))