def __getitem__(self, index): video_len = self.key_len[self._roidb_keys[index]] if cfg.VIDEO_FRAME == 1: duration = [0] elif cfg.VIDEO_FRAME % 2 == 1: duration = range(video_len // 2 - cfg.VIDEO_FRAME // 2 - 1, video_len // 2 + cfg.VIDEO_FRAME // 2) else: assert cfg.VIDEO_FRAME % 2 == 0 duration = range(video_len // 2 - cfg.VIDEO_FRAME // 2, video_len // 2 + cfg.VIDEO_FRAME // 2) assert len(duration) == cfg.VIDEO_FRAME blobs_list = [] for i in duration: single_db = self._roidb[self._roidb_keys[index] + "|{}".format(i)] blobs, valid = get_minibatch(single_db) for key in blobs: if (key != 'roidb') and (key != 'dataset_name'): blobs[key] = blobs[key].squeeze(axis=0) blobs['roidb'] = blob_utils.serialize(blobs['roidb']) blobs_list.append(blobs) return blobs_list
def __getitem__(self, index_tuple): index, ratio = index_tuple single_db = [self._roidb[index]] ## _add_proposals(xxx) blobs, valid = get_minibatch(single_db) #TODO: Check if minibatch is valid ? If not, abandon it. # Need to change _worker_loop in torch.utils.data.dataloader.py. # Squeeze batch dim for key in blobs: if key != 'roidb': blobs[key] = blobs[key].squeeze(axis=0) if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~ invalid)[0] if len(valid_inds) < len(boxes): for key in ['boxes', 'precomp_keypoints', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints', 'gt_actions', 'gt_role_id']: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [entry['segms'][ind] for ind in valid_inds] blobs['roidb'] = blob_utils.serialize(blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs
def __getitem__(self, index_tuple): index, ratio = index_tuple single_db = [self._roidb[index]] blobs, valid = get_minibatch(single_db) #cv2.imwrite('semseg.png',blobs['semseg_label_0'][0]) #TODO: Check if minibatch is valid ? If not, abandon it. # Need to change _worker_loop in torch.utils.data.dataloader.py. # Squeeze batch dim for key in blobs: if key != 'roidb' and key != 'image_name': blobs[key] = blobs[key].squeeze(axis=0) if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~invalid)[0] if len(valid_inds) < len(boxes): for key in [ 'boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ]: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [entry['segms'][ind] for ind in valid_inds] if not cfg.SEM.SEM_ON and not cfg.DISP.DISP_ON: blobs['roidb'] = blob_utils.serialize( blobs['roidb']) # CHECK: maybe we can serialize in collate_fn #print(blobs.keys()) return blobs
def __getitem__(self, index_tuple): # each time only one roidb go through this index, ratio = index_tuple single_db = [self._roidb[index]] blobs, valid = get_minibatch(single_db) #TODO: Check if minibatch is valid ? If not, abandon it. # Squeeze batch dim for key in blobs: if key != 'roidb': if blobs[key].shape[0] == 1: blobs[key] = blobs[key].squeeze(axis=0) if cfg.RPN.RPN_ON: if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~ invalid)[0] if len(valid_inds) < len(boxes): for key in ['boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints']: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [entry['segms'][ind] for ind in valid_inds] blobs['roidb'] = blob_utils.serialize(blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs
def __getitem__(self, index_tuple): index, ratio = index_tuple single_db = [self._roidb[index]] blobs, valid = get_minibatch(single_db, self.transform, self.valid_keys) # TODO: Check if minibatch is valid ? If not, abandon it. # Need to change _worker_loop in torch.utils.data.dataloader.py. # Squeeze batch dim for key in blobs: if key != 'roidb': blobs[key] = blobs[key].squeeze(axis=0) if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~invalid)[0] if len(valid_inds) < len(boxes): for key in [ 'boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ]: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [entry['segms'][ind] for ind in valid_inds] if cfg.TRAIN.RANDOM_CROP > 0: if 'segms_origin' not in blobs['roidb'][0].keys(): blobs['roidb'][0]['segms_origin'] = blobs['roidb'][0][ 'segms'].copy() self.crop_data_train(blobs) # Check bounding box, actually, it is not necessary... # entry = blobs['roidb'][0] # boxes = entry['boxes'] # invalid = (boxes[:, 0] < 0) | (boxes[:, 2] < 0) # valid_inds = np.nonzero(~ invalid)[0] # if len(valid_inds) < len(boxes): # for key in ['boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd']: # if key in entry: # entry[key] = entry[key][valid_inds] # # entry['box_to_gt_ind_map'] = np.array(list(range(len(valid_inds)))).astype(int) # entry['segms'] = [entry['segms'][ind] for ind in valid_inds] blobs['roidb'] = blob_utils.serialize( blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs
def __getitem__(self, index_tuple): index, scale = index_tuple single_db = [self._roidb[index]] blobs, valid = get_minibatch(single_db, scale) #TODO: Check if minibatch is valid ? If not, abandon it. # Need to change _worker_loop in torch.utils.data.dataloader.py. # Squeeze batch dim for key in blobs: if key != 'roidb' and key != 'data_flow': blobs[key] = blobs[key].squeeze(axis=0) blobs['roidb'] = blob_utils.serialize(blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" # Temporal dimensions of the output T = roidb[0]['boxes'].shape[-1] // 4 # Following vars are only used in FPN case, but keeping it out of the "if" # condition, so as to allow for _populate_rpn_blobs to work (it will pass # these dummy values and not use them) foas = [] k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = _get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios, T) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = _get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS, T) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): _populate_rpn_blobs(entry, im_scales[im_i], blobs, all_anchors, foas, foa, k_min, k_max) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" # Temporal dimensions of the output T = roidb[0]['boxes'].shape[-1] // 4 # Following vars are only used in FPN case, but keeping it out of the "if" # condition, so as to allow for _populate_rpn_blobs to work (it will pass # these dummy values and not use them) foas = [] k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper for lvl in range(k_min, k_max + 1): field_stride = 2. ** lvl anchor_sizes = ( cfg.FPN.RPN_ANCHOR_START_SIZE * 2. ** (lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = _get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios, T) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = _get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS, T) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): _populate_rpn_blobs(entry, im_scales[im_i], blobs, all_anchors, foas, foa, k_min, k_max) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def __getitem__(self, index_tuple): index, ratio = index_tuple single_db = [self._roidb[index]] if cfg.RPN.RPN_ON: blobs, valid = get_minibatch(single_db) # Squeeze batch dim # for key in blobs: # print (key, len(blobs[key]), '--------lala') for key in blobs: if key != 'roidb': blobs[key] = blobs[key].squeeze(axis=0) if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~invalid)[0] if len(valid_inds) < len(boxes): for key in [ 'boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ]: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [ entry['segms'][ind] for ind in valid_inds ] blobs['roidb'] = blob_utils.serialize( blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs else: return self._roidb[index]
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): #field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) field_stride = min(16., 2.**lvl) #anchor_sizes = (min(128., cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min)), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def im_get_det_rels(model, im, dataset_name, target_scale, target_max_size, boxes=None, roidb=None, use_gt_labels=False, include_feat=False): """Prepare the bbox for testing""" inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') if cfg.PYTORCH_VERSION_LESS_THAN_040: inputs['data'] = [ Variable(torch.from_numpy(inputs['data']), volatile=True) ] inputs['im_info'] = [ Variable(torch.from_numpy(inputs['im_info']), volatile=True) ] else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] if dataset_name is not None: inputs['dataset_name'] = [blob_utils.serialize(dataset_name)] if roidb is not None: inputs['roidb'] = [roidb] if use_gt_labels: inputs['use_gt_labels'] = [use_gt_labels] if include_feat: inputs['include_feat'] = [include_feat] return_dict = model(**inputs) return_dict2 = {} if return_dict['sbj_rois'] is not None: sbj_boxes = return_dict['sbj_rois'].data.cpu().numpy()[:, 1:5] / im_scale sbj_labels = return_dict['sbj_labels'].data.cpu().numpy() - 1 sbj_scores = return_dict['sbj_scores'].data.cpu().numpy() obj_boxes = return_dict['obj_rois'].data.cpu().numpy()[:, 1:5] / im_scale obj_labels = return_dict['obj_labels'].data.cpu().numpy() - 1 obj_scores = return_dict['obj_scores'].data.cpu().numpy() prd_scores = return_dict['prd_scores'].data.cpu().numpy() sbj_scores_out = return_dict['sbj_scores_out'].data.cpu().numpy() obj_scores_out = return_dict['obj_scores_out'].data.cpu().numpy() # att_all = return_dict["att_all"] if include_feat: sbj_feat = return_dict['sbj_feat'].data.cpu().numpy() obj_feat = return_dict['obj_feat'].data.cpu().numpy() prd_feat = return_dict['prd_feat'].data.cpu().numpy() if cfg.MODEL.USE_EMBED: prd_scores_embd = return_dict['prd_embd_scores'].data.cpu().numpy() return_dict2 = dict(sbj_boxes=sbj_boxes, sbj_labels=sbj_labels.astype(np.int32, copy=False), sbj_scores=sbj_scores, obj_boxes=obj_boxes, obj_labels=obj_labels.astype(np.int32, copy=False), obj_scores=obj_scores, prd_scores=prd_scores, sbj_scores_out=sbj_scores_out, obj_scores_out=obj_scores_out # att_all = att_all ) if include_feat: return_dict2['sbj_feat'] = sbj_feat return_dict2['obj_feat'] = obj_feat return_dict2['prd_feat'] = prd_feat if cfg.MODEL.USE_EMBED: return_dict2['prd_scores_embd'] = prd_scores_embd else: return_dict2 = dict(sbj_boxes=None, sbj_labels=None, sbj_scores=None, obj_boxes=None, obj_labels=None, obj_scores=None, prd_scores=None, sbj_scores_out=None, obj_scores_out=None) return return_dict2
def CollectAndDistributeFpnRpnProposalsRec(roidb, im_info): """Merge RPN proposals generated at multiple FPN levels and then distribute those proposals to their appropriate FPN levels. An anchor at one FPN level may predict an RoI that will map to another level, hence the need to redistribute the proposals. This function assumes standard blob names for input and output blobs. Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>, rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>] - rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois documentation from GenerateProposals. - rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN level i; see rpn_roi_probs documentation from GenerateProposals. If used during training, then the input blobs will also include: [roidb, im_info] (see GenerateProposalLabels). Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois, rois_idx_restore] - rois_fpn<i> are the RPN proposals for FPN level i - rois_idx_restore is a permutation on the concatenation of all rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are restored to their original order in the input blobs. If used during training, then the output blobs will also include: [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights]. """ k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL # Prepare input blobs rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)] score_names = [ 'rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1) ] blobs_in = rois_names + score_names blobs_in += ['roidb', 'im_info'] blobs_in = [core.ScopedBlobReference(b) for b in blobs_in] name = 'CollectAndDistributeFpnRpnProposalsRecOp:' + ','.join( [str(b) for b in blobs_in]) # Prepare output blobs blobs_out = roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=True) blobs_out = [core.ScopedBlobReference(b) for b in blobs_out] workspace.ResetWorkspace() net = core.Net("tutorial") net.Python(CollectAndDistributeFpnRpnProposalsRecOp(True).forward)( blobs_in, blobs_out) rois = np.ones((512, 5)) * 5 for i in range(512): rois[i, 0] = i for roi_name in rois_names: workspace.FeedBlob(roi_name, rois) for score_name in score_names: workspace.FeedBlob(score_name, np.ones((512, 1)) * 0.5) roidb_blob = blob_utils.serialize(roidb) workspace.FeedBlob('roidb', roidb_blob) workspace.FeedBlob('im_info', im_info) workspace.RunNetOnce(net)
def im_get_det_rels(model, im, dataset_name, target_scale, target_max_size, boxes=None, do_vis=False, roidb=None, use_gt_labels=False): """Prepare the bbox for testing""" inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') if cfg.PYTORCH_VERSION_LESS_THAN_040: inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)] inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)] else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] if dataset_name is not None: inputs['dataset_name'] = [blob_utils.serialize(dataset_name)] inputs['do_vis'] = [do_vis] if roidb is not None: inputs['roidb'] = [roidb] if use_gt_labels: inputs['use_gt_labels'] = [use_gt_labels] return_dict = model(**inputs) return_dict2 = {} if return_dict['sbj_rois'] is not None: sbj_boxes = return_dict['sbj_rois'].data.cpu().numpy()[:, 1:5] / im_scale sbj_labels = return_dict['sbj_labels'].data.cpu().numpy() - 1 sbj_scores = return_dict['sbj_scores'].data.cpu().numpy() obj_boxes = return_dict['obj_rois'].data.cpu().numpy()[:, 1:5] / im_scale obj_labels = return_dict['obj_labels'].data.cpu().numpy() - 1 obj_scores = return_dict['obj_scores'].data.cpu().numpy() prd_scores = return_dict['prd_scores'].data.cpu().numpy() # vaild_boxes = return_dict['vaild_rois'].data.cpu().numpy()[: ,1:5] / im_scale # vaild_labels = return_dict['vaild_labels'].data.cpu().numpy() - 1 if cfg.MODEL.USE_FREQ_BIAS and 'prd_scores_bias' in return_dict.keys(): prd_scores_bias = return_dict['prd_scores_bias'].data.cpu().numpy() if cfg.MODEL.USE_SPATIAL_FEAT and 'prd_scores_spt' in return_dict.keys(): prd_scores_spt = return_dict['prd_scores_spt'].data.cpu().numpy() if cfg.MODEL.ADD_SCORES_ALL and 'prd_ttl_scores' in return_dict.keys(): prd_scores_ttl = return_dict['prd_ttl_scores'].data.cpu().numpy() return_dict2 = dict(sbj_boxes=sbj_boxes, sbj_labels=sbj_labels.astype(np.int32, copy=False), sbj_scores=sbj_scores, obj_boxes=obj_boxes, obj_labels=obj_labels.astype(np.int32, copy=False), obj_scores=obj_scores, prd_scores=prd_scores,) # vaild_boxes=vaild_boxes, # vaild_labels=vaild_labels) if cfg.MODEL.ADD_SCORES_ALL and 'prd_ttl_scores' in return_dict.keys(): return_dict2['prd_scores_ttl'] = prd_scores_ttl if cfg.MODEL.USE_FREQ_BIAS and 'prd_scores_bias' in return_dict.keys(): return_dict2['prd_scores_bias'] = prd_scores_bias if cfg.MODEL.USE_SPATIAL_FEAT and 'prd_scores_spt' in return_dict.keys(): return_dict2['prd_scores_spt'] = prd_scores_spt if do_vis: if isinstance(return_dict['blob_conv'], list): blob_conv = [b.data.cpu().numpy().squeeze() for b in return_dict['blob_conv']] blob_conv_prd = [b.data.cpu().numpy().squeeze() for b in return_dict['blob_conv_prd']] blob_conv = [b.mean(axis=0) for b in blob_conv] blob_conv_prd = [b.mean(axis=0) for b in blob_conv_prd] return_dict2['blob_conv'] = blob_conv return_dict2['blob_conv_prd'] = blob_conv_prd else: blob_conv = return_dict['blob_conv'].data.cpu().numpy().squeeze() blob_conv_prd = return_dict['blob_conv_prd'].data.cpu().numpy().squeeze() blob_conv = blob_conv.mean(axis=0) blob_conv_prd = blob_conv_prd.mean(axis=0) return_dict2['blob_conv'] = blob_conv return_dict2['blob_conv_prd'] = blob_conv_prd else: return_dict2 = dict(sbj_boxes=None, sbj_labels=None, sbj_scores=None, obj_boxes=None, obj_labels=None, obj_scores=None, prd_scores=None) return return_dict2
def __getitem__(self, index_tuple): index, ratio = index_tuple single_db = [self._roidb[index]] blobs, valid = get_minibatch(single_db) #TODO: Check if minibatch is valid ? If not, abandon it. # Need to change _worker_loop in torch.utils.data.dataloader.py. # Squeeze batch dim for key in blobs: if key != 'roidb' and key != 'gt_cats' and key != 'binary_mask': blobs[key] = blobs[key].squeeze(axis=0) blobs['gt_cats'] = [x for x in blobs['gt_cats'] if x in self.list] blobs['gt_cats'] = np.array(blobs['gt_cats']) scale = blobs['im_info'][-1] mask = cv2.resize(blobs['binary_mask'], None, None, fx=scale, fy=scale, interpolation=cv2.INTER_NEAREST) kernel = np.ones((5, 5), np.uint8) mask = cv2.dilate(mask, kernel, iterations=1) blobs['binary_mask'] = mask query_type = 1 if self.training: # Random choice query catgory positive_catgory = blobs['gt_cats'] negative_catgory = np.array( list(set(self.cat_list) - set(positive_catgory))) r = random.random() if r <= cfg.TRAIN.QUERY_POSITIVE_RATE: query_type = 1 cand = np.unique(positive_catgory) if len(cand) == 1: choice = cand[0] else: p = [] for i in cand: p.append(self.show_time[i]) p = np.array(p) p /= p.sum() choice = np.random.choice(cand, 1, p=p)[0] query = self.load_query(choice) elif r > cfg.TRAIN.QUERY_POSITIVE_RATE and r <= cfg.TRAIN.QUERY_POSITIVE_RATE + cfg.TRAIN.QUERY_GLOBAL_NEGATIVE_RATE: query_type = 0 im = blobs['data'].copy() binary_mask = blobs['binary_mask'].copy() patch = self.sample_bg(im, binary_mask) if len(patch) == self.shot: query = patch else: print("No bg, the number of bg is: ", len(patch)) query_type = 0 cand = negative_catgory choice = np.random.choice(cand, 1)[0] query = self.load_query(choice) else: query_type = 0 cand = negative_catgory choice = np.random.choice(cand, 1)[0] query = self.load_query(choice) else: #query = self.load_query(index, single_db[0]['id']) query = self.crop_query(single_db, index, single_db[0]['id']) blobs['query'] = query blobs['query_type'] = query_type if 'gt_cats' in blobs: del blobs['gt_cats'] if 'binary_mask' in blobs: del blobs['binary_mask'] if self.training: if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~invalid)[0] if len(valid_inds) < len(boxes): for key in [ 'boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ]: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [ entry['segms'][ind] for ind in valid_inds ] blobs['roidb'] = blob_utils.serialize( blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs else: blobs['roidb'] = blob_utils.serialize(blobs['roidb']) choice = self.cat_list[index] blobs['choice'] = choice return blobs
def run_eval(args, cfg, maskRCNN, dataloader_test, step, output_dir, test_stats, best_eval_result, eval_subset='test'): is_best = False maskRCNN.eval() print( '------------------------------------------------------------------------------------------------------------' ) print('eval %s: %s' % (eval_subset, 'mAP' if cfg.EVAL_MAP else 'recall')) print( '------------------------------------------------------------------------------------------------------------' ) ### ------------------------------------------------------------------------------------------------------------------- # get results ### ------------------------------------------------------------------------------------------------------------------- file_names = {} boxes = {} scores = {} scores_ori = {} human_file_names = {} human_boxes = {} human_scores = {} human_scores_ori = {} obj_gt_cls_names = {} prd_gt_cls_names = {} obj_gt_cls_ids = {} prd_gt_cls_ids = {} if cfg.BINARY_LOSS: binary_preds = {} for i, input_data in enumerate(dataloader_test): for key in input_data: if key != 'roidb': input_data[key] = list(map(Variable, input_data[key])) if len(input_data['im_info']) != cfg.NUM_GPUS: print(len(input_data['im_info'])) net_outputs_dict = maskRCNN(**input_data) for triplet_name in net_outputs_dict.keys(): net_outputs = deepcopy(net_outputs_dict[triplet_name]) for gpu_i in range(cfg.NUM_GPUS): if triplet_name not in boxes: boxes[triplet_name] = [] scores[triplet_name] = [] scores_ori[triplet_name] = [] human_boxes[triplet_name] = [] human_scores[triplet_name] = [] human_scores_ori[triplet_name] = [] obj_gt_cls_names[triplet_name] = [] prd_gt_cls_names[triplet_name] = [] obj_gt_cls_ids[triplet_name] = [] prd_gt_cls_ids[triplet_name] = [] file_names[triplet_name] = [] human_file_names[triplet_name] = [] if cfg.BINARY_LOSS: binary_preds[triplet_name] = [] boxes[triplet_name] += [ box[(gpu_i) * cfg.TRAIN.BATCH_SIZE_PER_IM:(gpu_i + 1) * cfg.TRAIN.BATCH_SIZE_PER_IM, :] for box in net_outputs['predictions']['box'] ] scores[triplet_name] += [ score[(gpu_i) * cfg.TRAIN.BATCH_SIZE_PER_IM:(gpu_i + 1) * cfg.TRAIN.BATCH_SIZE_PER_IM] for score in net_outputs['predictions']['score'] ] scores_ori[triplet_name] += [ score_ori[(gpu_i) * cfg.TRAIN.BATCH_SIZE_PER_IM:(gpu_i + 1) * cfg.TRAIN.BATCH_SIZE_PER_IM] for score_ori in net_outputs['predictions']['score_ori'] ] assert len(net_outputs['predictions']['box'] [0]) == cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.NUM_GPUS assert len(net_outputs['predictions']['score'] [0]) == cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.NUM_GPUS assert len(net_outputs['predictions']['score_ori'] [0]) == cfg.TRAIN.BATCH_SIZE_PER_IM * cfg.NUM_GPUS file_name = blob_utils.deserialize( net_outputs['predictions']['files'].numpy()) obj_gt_cls_name = blob_utils.deserialize( net_outputs['predictions']['obj_gt_cls_name'].numpy()) prd_gt_cls_name = blob_utils.deserialize( net_outputs['predictions']['prd_gt_cls_name'].numpy()) obj_gt_cls = blob_utils.deserialize( net_outputs['predictions']['obj_gt_cls'].numpy()) prd_gt_cls = blob_utils.deserialize( net_outputs['predictions']['prd_gt_cls'].numpy()) file_names[triplet_name] += file_name obj_gt_cls_names[triplet_name] += obj_gt_cls_name prd_gt_cls_names[triplet_name] += prd_gt_cls_name obj_gt_cls_ids[triplet_name] += obj_gt_cls prd_gt_cls_ids[triplet_name] += prd_gt_cls len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize( net_outputs['predictions']['files'].numpy()))) net_outputs['predictions']['files'] = net_outputs[ 'predictions']['files'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['predictions'] ['obj_gt_cls_name'].numpy()))) net_outputs['predictions']['obj_gt_cls_name'] = net_outputs[ 'predictions']['obj_gt_cls_name'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['predictions'] ['prd_gt_cls_name'].numpy()))) net_outputs['predictions']['prd_gt_cls_name'] = net_outputs[ 'predictions']['prd_gt_cls_name'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize( net_outputs['predictions']['obj_gt_cls'].numpy()))) net_outputs['predictions']['obj_gt_cls'] = net_outputs[ 'predictions']['obj_gt_cls'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize( net_outputs['predictions']['prd_gt_cls'].numpy()))) net_outputs['predictions']['prd_gt_cls'] = net_outputs[ 'predictions']['prd_gt_cls'][len_gpu_i:] if cfg.BINARY_LOSS: binary_preds[triplet_name] += [ binary_pred[(gpu_i) * 2:(gpu_i + 1) * 2] for binary_pred in net_outputs['predictions'] ['binary_pred'] ] # human num_roi = cfg.MAX_NUM_HUMAN human_boxes[triplet_name] += [ box[(gpu_i) * num_roi:(gpu_i + 1) * num_roi, :] for box in net_outputs['human_predictions']['box'] ] human_scores[triplet_name] += [ score[(gpu_i) * num_roi:(gpu_i + 1) * num_roi] for score in net_outputs['human_predictions']['score'] ] human_scores_ori[triplet_name] += [ score_ori[(gpu_i) * num_roi:(gpu_i + 1) * num_roi] for score_ori in net_outputs['human_predictions']['score_ori'] ] human_file_name = blob_utils.deserialize( net_outputs['human_predictions']['files'].numpy()) human_obj_gt_cls_name = blob_utils.deserialize( net_outputs['human_predictions'] ['obj_gt_cls_name'].numpy()) human_prd_gt_cls_name = blob_utils.deserialize( net_outputs['human_predictions'] ['prd_gt_cls_name'].numpy()) obj_gt_cls = blob_utils.deserialize( net_outputs['human_predictions']['obj_gt_cls'].numpy()) prd_gt_cls = blob_utils.deserialize( net_outputs['human_predictions']['prd_gt_cls'].numpy()) human_file_names[triplet_name] += human_file_name len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['human_predictions'] ['files'].numpy()))) net_outputs['human_predictions']['files'] = net_outputs[ 'human_predictions']['files'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['human_predictions'] ['obj_gt_cls_name'].numpy()))) net_outputs['human_predictions'][ 'obj_gt_cls_name'] = net_outputs['human_predictions'][ 'obj_gt_cls_name'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['human_predictions'] ['prd_gt_cls_name'].numpy()))) net_outputs['human_predictions'][ 'prd_gt_cls_name'] = net_outputs['human_predictions'][ 'prd_gt_cls_name'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['human_predictions'] ['obj_gt_cls'].numpy()))) net_outputs['human_predictions']['obj_gt_cls'] = net_outputs[ 'human_predictions']['obj_gt_cls'][len_gpu_i:] len_gpu_i = len( blob_utils.serialize( blob_utils.deserialize(net_outputs['human_predictions'] ['prd_gt_cls'].numpy()))) net_outputs['human_predictions']['prd_gt_cls'] = net_outputs[ 'human_predictions']['prd_gt_cls'][len_gpu_i:] assert file_name == human_file_name assert obj_gt_cls_name == human_obj_gt_cls_name assert prd_gt_cls_name == human_prd_gt_cls_name assert len(scores[triplet_name]) == len( scores_ori[triplet_name]) == len(boxes[triplet_name]) == len( file_names[triplet_name]) assert len(human_scores[triplet_name]) == len( human_boxes[triplet_name]) == len( human_file_names[triplet_name]) assert len(file_names[triplet_name]) == len( obj_gt_cls_names[triplet_name]) == len( prd_gt_cls_names[triplet_name]) predictions_all_triplet = {} human_predictions_all_triplet = {} for triplet_name in net_outputs_dict.keys(): predictions = {} for i, file_name in enumerate(file_names[triplet_name]): predictions[file_name] = {} predictions[file_name]['boxes'] = boxes[triplet_name][i] predictions[file_name]['scores'] = scores[triplet_name][i] predictions[file_name]['scores_ori'] = scores_ori[triplet_name][i] predictions[file_name]['obj_gt_cls_names'] = obj_gt_cls_names[ triplet_name][i] predictions[file_name]['prd_gt_cls_names'] = prd_gt_cls_names[ triplet_name][i] predictions[file_name]['obj_gt_cls_ids'] = obj_gt_cls_ids[ triplet_name][i] predictions[file_name]['prd_gt_cls_ids'] = prd_gt_cls_ids[ triplet_name][i] if cfg.BINARY_LOSS: predictions[file_name]['binary_preds'] = binary_preds[ triplet_name][i] predictions_all_triplet[triplet_name] = predictions # human human_predictions = {} for i, file_name in enumerate(human_file_names[triplet_name]): human_predictions[file_name] = {} human_predictions[file_name]['boxes'] = human_boxes[triplet_name][ i] human_predictions[file_name]['scores'] = human_scores[ triplet_name][i] human_predictions[file_name]['scores_ori'] = human_scores_ori[ triplet_name][i] human_predictions_all_triplet[triplet_name] = human_predictions eval_input = {} eval_input['predictions_object_bbox'] = predictions_all_triplet eval_input['predictions_human_bbox'] = human_predictions_all_triplet eval_input[ 'video_name_triplet_dict'] = maskRCNN.module.video_name_triplet_dict # ------------------------------------------------------------------------------------------------------------ # Compute Recall and mAP # ------------------------------------------------------------------------------------------------------------ if 'vhico' in args.dataset: if not cfg.EVAL_MAP: frame_recall_phrase_ko = vhico_eval(cfg, eval_subset=eval_subset, eval_input=eval_input, GT_PATH_TEST=GT_PATH_TEST, GT_PATH_UNSEEN=GT_PATH_UNSEEN) test_stats.tb_log_stats( {'frame_recall_phrase_ko': frame_recall_phrase_ko}, step) if frame_recall_phrase_ko > best_eval_result: is_best = True best_eval_result = frame_recall_phrase_ko print('best test frame_recall_phrase_ko is %.4f at step %d' % (frame_recall_phrase_ko, step)) else: mAP_result = vhico_eval(cfg, eval_subset=eval_subset, eval_input=eval_input, GT_PATH_TEST=GT_PATH_TEST, GT_PATH_UNSEEN=GT_PATH_UNSEEN) ## set the model to training mode maskRCNN.train() return is_best, best_eval_result
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) blobs['anchors_%d' % lvl] = foa.cell_anchors foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors blobs['anchors'] = foa.cell_anchors # this is to add some fpn targets for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_rois = entry['boxes'][gt_inds, :] * scale im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) if cfg.TRAIN.CPP_RPN == 'all': for im_i, entry in enumerate(roidb): scale = im_scales[im_i] gt_inds = np.where(entry['gt_classes'] > 0)[0] # gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # blobs['gt_boxes_%02d' % im_i] = entry['boxes'][gt_inds, :] * scale gt_boxes = entry['boxes'][gt_inds, :] * scale gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1.) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) blobs['gt_boxes_%02d' % im_i] = np.hstack( (gt_boxes, gt_areas[:, np.newaxis])) # blobs['gt_boxes_%02d' % im_i] = entry['boxes'][gt_inds, :] blobs['gt_classes_%02d' % im_i] = entry['gt_classes'][gt_inds] else: valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_attributes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def __getitem__(self, index_tuple): index, ratio = index_tuple # this index is just the index of roidb, not the roidb_index # Get support roidb, support cls is same with query cls, and support image is different from query image. #query_cls = self.full_info_list[index, 1] #query_image = self.full_info_list[index, 2] query_cls = self.index_pd.loc[self.index_pd['index'] == index, 'cls_ls'].tolist()[0] query_img = self.index_pd.loc[self.index_pd['index'] == index, 'img_ls'].tolist()[0] all_cls = self.index_pd.loc[self.index_pd['img_ls'] == query_img, 'cls_ls'].tolist() #support_blobs, support_valid = get_minibatch(support_db) single_db = [self._roidb[index]] blobs, valid = get_minibatch(single_db) #TODO: Check if minibatch is valid ? If not, abandon it. # Need to change _worker_loop in torch.utils.data.dataloader.py. # Squeeze batch dim for key in blobs: if key != 'roidb': blobs[key] = blobs[key].squeeze(axis=0) if self._roidb[index]['need_crop']: self.crop_data(blobs, ratio) # Check bounding box entry = blobs['roidb'][0] boxes = entry['boxes'] invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3]) valid_inds = np.nonzero(~invalid)[0] if len(valid_inds) < len(boxes): for key in [ 'boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ]: if key in entry: entry[key] = entry[key][valid_inds] entry['segms'] = [entry['segms'][ind] for ind in valid_inds] # Crop support data and get new support box in the support data support_way = 2 #2 #5 #2 support_shot = 5 #5 support_data_all = np.zeros((support_way * support_shot, 3, 320, 320), dtype=np.float32) support_box_all = np.zeros((support_way * support_shot, 4), dtype=np.float32) used_img_ls = [query_img] used_index_ls = [index] #used_cls_ls = [query_cls] used_cls_ls = list(set(all_cls)) support_cls_ls = [] mixup_i = 0 for shot in range(support_shot): # Support image and box support_index = self.index_pd.loc[ (self.index_pd['cls_ls'] == query_cls) & (~self.index_pd['img_ls'].isin(used_img_ls)) & (~self.index_pd['index'].isin(used_index_ls)), 'index'].sample(random_state=index).tolist()[0] support_cls = self.index_pd.loc[ self.index_pd['index'] == support_index, 'cls_ls'].tolist()[0] support_img = self.index_pd.loc[ self.index_pd['index'] == support_index, 'img_ls'].tolist()[0] used_index_ls.append(support_index) used_img_ls.append(support_img) support_db = [self._roidb[support_index]] support_data, support_box = self.crop_support(support_db) support_data_all[mixup_i] = support_data support_box_all[mixup_i] = support_box[0] support_cls_ls.append(support_cls) #- 1) #assert support_cls - 1 >= 0 mixup_i += 1 if support_way == 1: pass else: for way in range(support_way - 1): other_cls = self.index_pd.loc[ (~self.index_pd['cls_ls'].isin(used_cls_ls)), 'cls_ls'].drop_duplicates().sample( random_state=index).tolist()[0] used_cls_ls.append(other_cls) for shot in range(support_shot): # Support image and box support_index = self.index_pd.loc[ (self.index_pd['cls_ls'] == other_cls) & (~self.index_pd['img_ls'].isin(used_img_ls)) & (~self.index_pd['index'].isin(used_index_ls)), 'index'].sample(random_state=index).tolist()[0] support_cls = self.index_pd.loc[self.index_pd['index'] == support_index, 'cls_ls'].tolist()[0] support_img = self.index_pd.loc[self.index_pd['index'] == support_index, 'img_ls'].tolist()[0] used_index_ls.append(support_index) used_img_ls.append(support_img) support_db = [self._roidb[support_index]] support_data, support_box = self.crop_support(support_db) support_data_all[mixup_i] = support_data support_box_all[mixup_i] = support_box[0] support_cls_ls.append(support_cls) #- 1) #assert support_cls - 1 >= 0 mixup_i += 1 blobs[ 'support_data'] = support_data_all #final_support_data #support_blobs['data'] blobs['roidb'][0][ 'support_boxes'] = support_box_all #support_blobs['roidb'][0]['boxes'] # only one box blobs['roidb'][0]['support_id'] = support_db[0]['id'] #blobs['roidb'][0]['gt_classes'] = blobs['roidb'][0]['gt_classes'] #np.array([1] * (len(blobs['roidb'][0]['gt_classes']))) blobs['roidb'][0]['support_cls'] = support_cls_ls blobs['roidb'][0]['query_id'] = single_db[0]['id'] blobs['roidb'][0]['target_cls'] = single_db[0]['target_cls'] blobs['roidb'] = blob_utils.serialize( blobs['roidb']) # CHECK: maybe we can serialize in collate_fn return blobs
def collect_output(cfg, dataset_name, im_info, roidb, obj_rois, gt_obj_label, gt_prd_label, output, device_id): obj_loss, prd_loss, weight_loss, weight_human_loss, video_loss, video_binary_loss, roi_weights_unpacked, roi_weights_human_unpacked, densepose_roi, roi_weights_ori, roi_weights_human_ori, cls_prediction = output return_dict = {} if (dataset_name == cfg.TEST.DATASETS[0]) or (dataset_name == cfg.UNSEEN.DATASETS[0]): cls_prediction['obj_label'] = gt_obj_label.view( -1, cfg.VIDEO_FRAME, 1).detach().cpu().numpy() cls_prediction['prd_label'] = gt_prd_label.view( -1, cfg.VIDEO_FRAME, 1).detach().cpu().numpy() im_info_unpacked = im_info.view(-1, cfg.VIDEO_FRAME, 3).detach().cpu().numpy() obj_rois_unpacked = torch.tensor(obj_rois).view( -1, cfg.VIDEO_FRAME, cfg.TRAIN.BATCH_SIZE_PER_IM, 5) obj_rois_unpacked = obj_rois_unpacked.detach().cpu().numpy() roi_weights_unpacked = roi_weights_unpacked.view( -1, cfg.VIDEO_FRAME, cfg.TRAIN.BATCH_SIZE_PER_IM) roi_weights_unpacked = roi_weights_unpacked.detach().cpu().numpy() roi_weights_human_unpacked = roi_weights_human_unpacked.view( -1, cfg.VIDEO_FRAME, cfg.MAX_NUM_HUMAN) roi_weights_human_unpacked = roi_weights_human_unpacked.detach().cpu( ).numpy() roi_weights_ori = roi_weights_ori.view(-1, cfg.VIDEO_FRAME, cfg.TRAIN.BATCH_SIZE_PER_IM) roi_weights_ori = roi_weights_ori.detach().cpu().numpy() roi_weights_human_ori = roi_weights_human_ori.view( -1, cfg.VIDEO_FRAME, cfg.MAX_NUM_HUMAN) roi_weights_human_ori = roi_weights_human_ori.detach().cpu().numpy() densepose_obj_rois_unpacked = torch.tensor(densepose_roi).view( -1, cfg.VIDEO_FRAME, cfg.MAX_NUM_HUMAN, 5) densepose_obj_rois_unpacked = densepose_obj_rois_unpacked.detach().cpu( ).numpy() predictions = {} predictions['files'] = [] predictions['box'] = [] predictions['score'] = [] predictions['score_ori'] = [] predictions['obj_gt_cls_name'] = [] predictions['prd_gt_cls_name'] = [] predictions['obj_gt_cls'] = [] predictions['prd_gt_cls'] = [] predictions['binary_pred'] = [] human_predictions = {} human_predictions['files'] = [] human_predictions['box'] = [] human_predictions['score'] = [] human_predictions['score_ori'] = [] human_predictions['obj_gt_cls_name'] = [] human_predictions['prd_gt_cls_name'] = [] human_predictions['obj_gt_cls'] = [] human_predictions['prd_gt_cls'] = [] for test_i, obj_rois_unpacked_i in enumerate(obj_rois_unpacked): for test_j, obj_rois_unpacked_i_j in enumerate( obj_rois_unpacked_i): file_name = '/'.join( roidb[test_j]['file_name'].split('/')[-3:]) this_obj_rois = obj_rois_unpacked_i_j / im_info_unpacked[ test_i][test_j][2] prd_gt_cls_name = roidb[test_j]['prd_gt_cls_name'] obj_gt_cls_name = roidb[test_j]['obj_gt_cls_name'] prd_gt_cls = roidb[test_j]['prd_gt_cls'] obj_gt_cls = roidb[test_j]['obj_gt_cls'] # obj predictions['files'].append(file_name) predictions['box'].append(this_obj_rois) predictions['score'].append( roi_weights_unpacked[test_i][test_j]) predictions['score_ori'].append( roi_weights_ori[test_i][test_j]) predictions['obj_gt_cls_name'].append(obj_gt_cls_name) predictions['prd_gt_cls_name'].append(prd_gt_cls_name) predictions['obj_gt_cls'].append(obj_gt_cls) predictions['prd_gt_cls'].append(prd_gt_cls) if cfg.BINARY_LOSS: predictions['binary_pred'].append( cls_prediction['binary_pred'][test_i]) # human this_obj_rois = densepose_obj_rois_unpacked[test_i][ test_j] / im_info_unpacked[test_i][test_j][2] human_predictions['files'].append(file_name) human_predictions['box'].append(this_obj_rois) human_predictions['score'].append( roi_weights_human_unpacked[test_i][test_j]) human_predictions['score_ori'].append( roi_weights_human_ori[test_i][test_j]) human_predictions['obj_gt_cls_name'].append(obj_gt_cls_name) human_predictions['prd_gt_cls_name'].append(prd_gt_cls_name) human_predictions['obj_gt_cls'].append(obj_gt_cls) human_predictions['prd_gt_cls'].append(prd_gt_cls) # obj predictions['files'] = blob_utils.serialize(predictions['files']) predictions['obj_gt_cls_name'] = blob_utils.serialize( predictions['obj_gt_cls_name']) predictions['prd_gt_cls_name'] = blob_utils.serialize( predictions['prd_gt_cls_name']) predictions['obj_gt_cls'] = blob_utils.serialize( predictions['obj_gt_cls']) predictions['prd_gt_cls'] = blob_utils.serialize( predictions['prd_gt_cls']) return_dict['predictions'] = predictions # human human_predictions['files'] = blob_utils.serialize( human_predictions['files']) human_predictions['obj_gt_cls_name'] = blob_utils.serialize( human_predictions['obj_gt_cls_name']) human_predictions['prd_gt_cls_name'] = blob_utils.serialize( human_predictions['prd_gt_cls_name']) human_predictions['obj_gt_cls'] = blob_utils.serialize( predictions['obj_gt_cls']) human_predictions['prd_gt_cls'] = blob_utils.serialize( predictions['prd_gt_cls']) return_dict['human_predictions'] = human_predictions return_dict['losses'] = {} return_dict['losses']['obj_loss'] = obj_loss return_dict['losses']['prd_loss'] = prd_loss return_dict['losses']['weight_loss'] = weight_loss return_dict['losses']['weight_human_loss'] = weight_human_loss return_dict['losses']['video_loss'] = video_loss if cfg.BINARY_LOSS: return_dict['losses']['video_binary_loss'] = video_binary_loss # pytorch0.4 bug on gathering scalar(0-dim) tensors for k, v in return_dict['losses'].items(): return_dict['losses'][k] = v.unsqueeze(0) return return_dict