def get_rel_inds(self, rel_labels, im_inds, box_priors): """ Get the relationship candidates :param rel_labels: array of relation labels :param im_inds: image indices :param box_priors: RoI bounding boxes :return rel_inds """ if self.training: rel_inds = rel_labels[:, :3].data.clone() else: rel_cands = im_inds.data[:, None] == im_inds.data[None] rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 # Require overlap for detection if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(box_priors.data, box_priors.data) > 0) # if there are fewer then 100 things then we might as well add some? amt_to_add = 100 - rel_cands.long().sum() rel_cands = rel_cands.nonzero() if rel_cands.dim() == 0: rel_cands = im_inds.data.new(1, 2).fill_(0) rel_inds = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) return rel_inds
def get_rel_inds(self, rel_labels, im_inds, box_priors): """Get relation index Args: rel_labels: Variable im_inds: Variable box_priors: Variable """ # Get the relationship candidates if self.training: rel_inds = rel_labels[:, :3].data.contiguous().clone() else: rel_cands = im_inds.data[:, None] == im_inds.data[None] rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 # Require overlap for detection if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(box_priors.data, box_priors.data) > 0) # if there are fewer then 100 things then we might as well add some? amt_to_add = 100 - rel_cands.long().sum() rel_cands = rel_cands.nonzero() if rel_cands.dim() == 0: rel_cands = im_inds.data.new(1, 2).fill_(0) rel_inds = torch.cat( (im_inds.data[rel_cands[:, 0]][:, None].contiguous(), rel_cands), 1) return rel_inds
def get_msg_rel_inds(self, im_inds, box_priors, box_score): rel_cands = im_inds.data[:, None] == im_inds.data[None] rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(box_priors.data, box_priors.data) > conf.overlap_thresh) rel_cands = rel_cands.nonzero() if rel_cands.dim() == 0: rel_cands = im_inds.data.new(1, 2).fill_(0) rel_inds = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) return rel_inds
def get_rel_inds(self, rel_labels, im_inds, box_priors, box_score): if self.training: rel_inds = rel_labels[:, :3].data.clone() else: rel_cands = im_inds.data[:, None] == im_inds.data[None] rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 # Require overlap for detection # Require overlap in the test stage if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(box_priors.data, box_priors.data) > 0) rel_cands = rel_cands.nonzero() if rel_cands.dim() == 0: rel_cands = im_inds.data.new(1, 2).fill_(0) rel_inds = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) return rel_inds
def get_overlap_info(im_inds, box_priors): """ input: im_inds: [num_object] box_priors: [number_object, 4] output: [number_object, 6] number of overlapped obj (self not included) sum of all intersection area (self not included) sum of IoU (Intersection over Union) average of all intersection area (self not included) average of IoU (Intersection over Union) roi area """ # generate forest num_obj = box_priors.shape[0] inds_offset = (im_inds * 1000).view(-1, 1).expand(box_priors.shape) offset_box = box_priors + inds_offset.float() intersection = bbox_intersections(offset_box, offset_box) overlap = bbox_overlaps(offset_box, offset_box) # [obj_num, obj_num], diagonal elements should been removed reverse_eye = Variable(1.0 - torch.eye(num_obj).float().cuda()) intersection = intersection * reverse_eye overlap = overlap * reverse_eye box_area = bbox_area(offset_box) # generate input feat boxes_info = Variable(torch.FloatTensor( num_obj, 6).zero_().cuda()) # each obj has how many overlaped objects for obj_idx in range(num_obj): boxes_info[obj_idx, 0] = torch.nonzero(intersection[obj_idx]).numel() boxes_info[obj_idx, 1] = intersection[obj_idx].view(-1).sum() / float( IM_SCALE * IM_SCALE) boxes_info[obj_idx, 2] = overlap[obj_idx].view(-1).sum() boxes_info[obj_idx, 3] = boxes_info[obj_idx, 1] / (boxes_info[obj_idx, 0] + 1e-9) boxes_info[obj_idx, 4] = boxes_info[obj_idx, 2] / (boxes_info[obj_idx, 0] + 1e-9) boxes_info[obj_idx, 5] = box_area[obj_idx] / float(IM_SCALE * IM_SCALE) return boxes_info, intersection
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, return_fmap=False, depth_imgs=None): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param proposals: things :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: """ fmap = self.feature_map(x) # whats rel_labels? how con get boxes give us relation labels?!! # It is basically a N*N matrix W (N is the number of boxes) where W[i,j] = 1 if bounding # box of i and j have intersetion. If self.require_overlap = False, then the matrix is all # 1s (except for the diagonal) # Get boxes from RPN rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \ self.get_boxes(fmap, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, train_anchor_inds, proposals=proposals) # Now classify them obj_fmap = self.obj_feature_map(fmap, rois) od_obj_dists = self.score_fc(obj_fmap) # whats that? It is for refining the bounding boxes. od_box_deltas = self.bbox_fc(obj_fmap).view( -1, len(self.classes), 4) if self.mode != 'gtbox' else None # whats that? Bounding box coordinates ( rois[:, 0] contains the image indices) od_box_priors = rois[:, 1:] if (not self.training and not self.mode == 'gtbox') or self.mode in ('proposals', 'refinerels'): nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes( od_obj_dists, rois, od_box_deltas, im_sizes, ) # whats im_inds, whats image_offset? Its always zero, they don't really use it im_inds = nms_imgs + image_offset obj_dists = od_obj_dists[nms_inds] obj_fmap = obj_fmap[nms_inds] box_deltas = od_box_deltas[nms_inds] box_priors = nms_boxes[:, 0] if self.training and not self.mode == 'gtbox': # NOTE: If we're doing this during training, we need to assign labels here. pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) rm_obj_labels = gt_classes[:, 1][argmax_overlaps] rm_obj_labels[max_overlaps < 0.5] = 0 else: rm_obj_labels = None else: im_inds = rois[:, 0].long().contiguous() + image_offset nms_scores = None nms_preds = None nms_boxes_assign = None nms_boxes = None # whats box priors? difference with od_box_priors? Its the same in Predcls, in SGCls it # is the ground truth (while the other one comes from the classifier) box_priors = rois[:, 1:] # whats the difference with od_obj_dists? The same except that it depends on ground truth boxes. rm_obj_labels = obj_labels box_deltas = od_box_deltas obj_dists = od_obj_dists return Result( od_obj_dists=od_obj_dists, rm_obj_dists=obj_dists, obj_scores=nms_scores, obj_preds=nms_preds, obj_fmap=obj_fmap, od_box_deltas=od_box_deltas, rm_box_deltas=box_deltas, od_box_targets=bbox_targets, rm_box_targets=bbox_targets, od_box_priors=od_box_priors, rm_box_priors=box_priors, boxes_assigned=nms_boxes_assign, boxes_all=nms_boxes, od_obj_labels=obj_labels, rm_obj_labels=rm_obj_labels, rpn_scores=rpn_scores, rpn_box_deltas=rpn_box_deltas, rel_labels=rel_labels, im_inds=im_inds, fmap=fmap if return_fmap else None )
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, return_fmap=False): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param proposals: things :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: """ # shape: (batch_size, 512, 37, 37) fmap = self.feature_map(x) # Get boxes from RPN # rois: (NumOfRoIs, 5), [img_ind, x0, x1, y0, y1] # obj_labels: (NumOfRoIs,), object class index # bbox_targets: NoneType in Rel_Model # rpn_scores:NoneType in Rel_Model # rpn_box_deltas: # rel_labels (NumOfRels, 4) [img ind, box0 ind, box1ind, rel type] # rel_labels is relation labels of every proposal in image # eg. in this gpu: there are 2 images with 8 proposals and 9 proposals, so # rel_labels first dimension size is 8*(8-1)+9*(9-1) = 56+72 = 128 # the proposal numbers can be inferred from `rois` rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \ self.get_boxes(fmap, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, train_anchor_inds, proposals=proposals) # Now classify them # obj_fmap: (NumOfRoI, 4096) obj_fmap = self.obj_feature_map(fmap, rois) # od_obj_dists: (NumOfRoI, NumOfClasses) od_obj_dists = self.score_fc(obj_fmap) od_box_deltas = self.bbox_fc(obj_fmap).view(-1, len( self.classes), 4) if self.mode != 'gtbox' else None od_box_priors = rois[:, 1:] if (not self.training and not self.mode == 'gtbox') or self.mode in ('proposals', 'refinerels'): nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes( od_obj_dists, rois, od_box_deltas, im_sizes, ) im_inds = nms_imgs + image_offset obj_dists = od_obj_dists[nms_inds] obj_fmap = obj_fmap[nms_inds] box_deltas = od_box_deltas[nms_inds] box_priors = nms_boxes[:, 0] if self.training and not self.mode == 'gtbox': # NOTE: If we're doing this during training, we need to assign labels here. pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) rm_obj_labels = gt_classes[:, 1][argmax_overlaps] rm_obj_labels[max_overlaps < 0.5] = 0 else: rm_obj_labels = None else: im_inds = rois[:, 0].long().contiguous() + image_offset nms_scores = None nms_preds = None nms_boxes_assign = None nms_boxes = None box_priors = rois[:, 1:] rm_obj_labels = obj_labels box_deltas = od_box_deltas obj_dists = od_obj_dists #embed(header='object_detector.py before return') return Result( od_obj_dists=od_obj_dists, rm_obj_dists=obj_dists, obj_scores=nms_scores, obj_preds=nms_preds, obj_fmap=obj_fmap, od_box_deltas=od_box_deltas, rm_box_deltas=box_deltas, od_box_targets=bbox_targets, rm_box_targets=bbox_targets, od_box_priors=od_box_priors, rm_box_priors=box_priors, boxes_assigned=nms_boxes_assign, boxes_all=nms_boxes, od_obj_labels=obj_labels, rm_obj_labels=rm_obj_labels, rpn_scores=rpn_scores, rpn_box_deltas=rpn_box_deltas, rel_labels=rel_labels, im_inds=im_inds, fmap=fmap if return_fmap else None, )
def proposal_assignments_rel(rpn_rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ fg_rois_per_image = int(np.round(ROIS_PER_IMG_REL * FG_FRACTION_REL)) fg_rels_per_image = int(np.round(REL_FG_FRACTION * RELS_PER_IMG)) pred_inds_np = rpn_rois[:, 0].cpu().numpy().astype(np.int64) pred_boxes_np = rpn_rois[:, 1:].cpu().numpy() gt_boxes_np = gt_boxes.cpu().numpy() gt_classes_np = gt_classes.cpu().numpy() gt_rels_np = gt_rels.cpu().numpy() gt_classes_np[:, 0] -= image_offset gt_rels_np[:, 0] -= image_offset num_im = gt_classes_np[:, 0].max() + 1 rois = [] obj_labels = [] rel_labels = [] bbox_targets = [] num_box_seen = 0 for im_ind in range(num_im): pred_ind = np.where(pred_inds_np == im_ind)[0] gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] gt_boxes_i = gt_boxes_np[gt_ind] gt_classes_i = gt_classes_np[gt_ind, 1] gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] pred_boxes_i = np.concatenate((pred_boxes_np[pred_ind], gt_boxes_i), 0) ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) obj_inds_i, obj_labels_i, obj_assignments_i = _sel_inds( ious, gt_classes_i, fg_thresh, fg_rois_per_image, ROIS_PER_IMG_REL) all_rels_i = _sel_rels(ious[obj_inds_i], pred_boxes_i[obj_inds_i], obj_labels_i, gt_classes_i, gt_rels_i, fg_thresh=fg_thresh, fg_rels_per_image=fg_rels_per_image) all_rels_i[:, 0:2] += num_box_seen rois.append( np.column_stack(( im_ind * np.ones(obj_inds_i.shape[0], dtype=np.float32), pred_boxes_i[obj_inds_i], ))) obj_labels.append(obj_labels_i) rel_labels.append( np.column_stack(( im_ind * np.ones(all_rels_i.shape[0], dtype=np.int64), all_rels_i, ))) # print("Gtboxes i {} obj assignments i {}".format(gt_boxes_i, obj_assignments_i)) bbox_targets.append(gt_boxes_i[obj_assignments_i]) num_box_seen += obj_inds_i.size rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(rpn_rois.get_device(), non_blocking=True) labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(rpn_rois.get_device(), non_blocking=True) bbox_targets = torch.FloatTensor(np.concatenate(bbox_targets, 0)).cuda( rpn_rois.get_device(), non_blocking=True) rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda( rpn_rois.get_device(), non_blocking=True) return rois, labels, bbox_targets, rel_labels
def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)): det_res = detector[b] # if conf.num_gpus == 1: # det_res = [det_res] assert conf.num_gpus == 1 boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res gt_entry = { 'gt_classes': val.gt_classes[batch_num].copy(), # (23,) 'gt_relations': val.relationships[batch_num].copy(), # (29, 3) 'gt_boxes': val.gt_boxes[batch_num].copy(), # (23, 4) } # gt_entry = {'gt_classes': gtc[i], 'gt_relations': gtr[i], 'gt_boxes': gtb[i]} assert np.all(objs_i[rels_i[:, 0]] > 0) and np.all(objs_i[rels_i[:, 1]] > 0) # assert np.all(rels_i[:, 2] > 0) pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE, # (64, 4) 'pred_classes': objs_i, # (64,) 'pred_rel_inds': rels_i, # (1202, 2) 'obj_scores': obj_scores_i, # (64,) 'rel_scores': pred_scores_i, # (1202, 51) } # pred_5ples: (num_rel, 5), (id0, id1, cls0, cls1, rel) pred_to_gt, pred_5ples, rel_scores = evaluator[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) # SET RECALL THRESHOLD HERE pred_to_gt = pred_to_gt[:50] pred_5ples = pred_5ples[:50] # Get a list of objects that match, and GT objects that dont objs_match = (bbox_overlaps(pred_entry['pred_boxes'], gt_entry['gt_boxes']) >= 0.5) & ( objs_i[:, None] == gt_entry['gt_classes'][None] ) objs_matched = objs_match.any(1) has_seen = defaultdict(int) has_seen_gt = defaultdict(int) pred_ind2name = {} gt_ind2name = {} edges = {} missededges = {} badedges = {} if val.filenames[batch_num].startswith('625'): import ipdb ipdb.set_trace() # query_pred and query_gt is giving the name to the different instance in the same class # generate "man-1", "man-2", ... def query_pred(pred_ind): if pred_ind not in pred_ind2name: # "pred_ind" is the row index of objs_i, objs_i[pred_ind] gets a value representing a class # get the name of this class using this value and train.'ind_to_classes' has_seen[objs_i[pred_ind]] += 1 pred_ind2name[pred_ind] = '{}-{}'.format(train.ind_to_classes[objs_i[pred_ind]], has_seen[objs_i[pred_ind]]) return pred_ind2name[pred_ind] def query_gt(gt_ind): gt_cls = gt_entry['gt_classes'][gt_ind] if gt_ind not in gt_ind2name: has_seen_gt[gt_cls] += 1 gt_ind2name[gt_ind] = '{}-GT{}'.format(train.ind_to_classes[gt_cls], has_seen_gt[gt_cls]) return gt_ind2name[gt_ind] ############################################################################################################### # divide gt_5ples and pred_5ples into 4 parts: edges, missededges, badedges (50-good edges) # 5ples: (# gt/pred rel, 5), (id0, id1, cls0, cls1, rel); id0, id1 are the row index of "gt/pred_classes" array ############################################################################################################### # 1. edges matching_pred5ples = pred_5ples[np.array([len(x) > 0 for x in pred_to_gt])] # the matched 5ples, shaped (#pred, 5), but only #match has content for fiveple in matching_pred5ples: # fiveple: the 5ples that get "matched" head_name = query_pred(fiveple[0]) # get "man-2" tail_name = query_pred(fiveple[1]) # get "ball-1" edges[(head_name, tail_name)] = train.ind_to_predicates[fiveple[4]] #{(man-2,ball-1): playing ...} # 2. missededges gt_5ples = np.column_stack((gt_entry['gt_relations'][:, :2], gt_entry['gt_classes'][gt_entry['gt_relations'][:, 0]], gt_entry['gt_classes'][gt_entry['gt_relations'][:, 1]], gt_entry['gt_relations'][:, 2], )) # [ind0, ind1, cls0, cls1, rel] has_match = reduce(np.union1d, pred_to_gt) # the list of row index of gt_5ples which get matched; [ 5. 10. 11. 12.] # give the 5ples names (-1, -2, -GT ...) which don't get matched for gt in gt_5ples[np.setdiff1d(np.arange(gt_5ples.shape[0]), has_match)]: # get the row index which doesn't get matched # gt is the missed gt_5ples; Head and tail namez = [] for i in range(2): # i = 0, 1 corresponds obj1, obj2 matching_obj = np.where(objs_match[:, gt[i]])[0] # >0 means this gt (shaped [1,5]) if matching_obj.size > 0: name = query_pred(matching_obj[0]) else: name = query_gt(gt[i]) namez.append(name) missededges[tuple(namez)] = train.ind_to_predicates[gt[4]] #{(woman-2,ball-1): playing ...} # 3. badedges # fiveple: get the 5ples that no head or tail existing in good edges not_matching_pred5ples = pred_5ples[np.array([len(x) == 0 for x in pred_to_gt])] for fiveple in not_matching_pred5ples: #for fiveple in pred_5ples[np.setdiff1d(np.arange(pred_5ples.shape[0]), matching_pred5ples)]: head_name_bad = query_pred(fiveple[0]) tail_name_bad = query_pred(fiveple[1]) badedges[(head_name_bad, tail_name_bad)] = train.ind_to_predicates[fiveple[4]] # two "if" branch kill most 5ples #if fiveple[0] in pred_ind2name: # if fiveple[1] in pred_ind2name: # badedges[(pred_ind2name[fiveple[0]], pred_ind2name[fiveple[1]])] = train.ind_to_predicates[fiveple[4]] theimg = load_unscaled(val.filenames[batch_num]) draw1 = ImageDraw.Draw(theimg) theimg2 = theimg.copy() draw2 = ImageDraw.Draw(theimg2) theimg3 = theimg.copy() draw3 = ImageDraw.Draw(theimg3) # using pred/gt_ind2name to fix the names of different instances with the same classes # gt/pred_ind is the keys: id0, id1 of 5ples # draw man-1, man-2 onto the corresponding object's box for pred_ind in pred_ind2name.keys(): draw1 = draw_box(draw1, pred_entry['pred_boxes'][pred_ind], cls_ind=objs_i[pred_ind], text_str=pred_ind2name[pred_ind]) for gt_ind in gt_ind2name.keys(): draw2 = draw_box(draw2, gt_entry['gt_boxes'][gt_ind], cls_ind=gt_entry['gt_classes'][gt_ind], text_str=gt_ind2name[gt_ind]) #import ipdb #ipdb.set_trace() for pred_64 in range(pred_entry['pred_boxes'].shape[0]): if pred_64 not in pred_ind2name: # pred_ind2name's key is the index of pred_boxes (64) class_score_text = train.ind_to_classes[pred_entry['pred_classes'][pred_64]] + \ '--' + str(pred_entry['obj_scores'][pred_64]) draw3 = draw_box(draw3, pred_entry['pred_boxes'][pred_64,:], cls_ind= pred_entry['pred_classes'][pred_64], text_str=class_score_text) # "-60" means recall is 60 recall = int(100 * len(reduce(np.union1d, pred_to_gt)) / gt_entry['gt_relations'].shape[0]) id = '{}-{}'.format(val.filenames[batch_num].split('/')[-1][:-4], recall) dirname = '/home/yiwuzhong/motifs/qualitative/' + conf.mode + '/' pathname = os.path.join(dirname) if not os.path.exists(pathname): os.mkdir(pathname) theimg.save(os.path.join(pathname, id + '-deteceted.jpg'), quality=100, subsampling=0) theimg2.save(os.path.join(pathname, id + '-missed.jpg'), quality=100, subsampling=0) theimg3.save(os.path.join(pathname, id + '-rcnnbox.jpg'), quality=100, subsampling=0) #import ipdb #ipdb.set_trace() with open(os.path.join(pathname, id + '.txt'), 'w') as f: f.write('Good: gt and detected \n') for (o1, o2), p in edges.items(): f.write('{} - {} - {}\n'.format(o1, p, o2)) f.write('\nMissed: gt but missed \n') for (o1, o2), p in missededges.items(): f.write('{} - {} - {}\n'.format(o1, p, o2)) f.write('\nBad: not gt but detected \n') for (o1, o2), p in badedges.items(): f.write('{} - {} - {}\n'.format(o1, p, o2)) with open(os.path.join(pathname, id + '-box.txt'), 'w') as bb: bb.write('Detected Boxes from Faster RCNN') for bbi in range(pred_entry['pred_classes'].shape[0]): bb.write('{}: {}\n'.format(train.ind_to_classes[pred_entry['pred_classes'][bbi]], pred_entry['obj_scores'][bbi]))
def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1 :param gt_classes: [num_boxes, 2.0] array of [img_ind, class] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. """ fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION)) gt_img_inds = gt_classes[:, 0] - image_offset all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0) ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0) im_sorted, idx = torch.sort(ims_per_box, 0) all_boxes = all_boxes[idx] # Assume that the GT boxes are already sorted in terms of image id num_images = int(im_sorted[-1]) + 1 labels = [] rois = [] bbox_targets = [] for im_ind in range(num_images): g_inds = (gt_img_inds == im_ind).nonzero() if g_inds.dim() == 0: continue g_inds = g_inds.squeeze(1) g_start = g_inds[0] g_end = g_inds[-1] + 1 t_inds = (im_sorted == im_ind).nonzero().squeeze(1) t_start = t_inds[0] t_end = t_inds[-1] + 1 # Max overlaps: for each predicted box, get the max ROI # Get the indices into the GT boxes too (must offset by the box start) ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end]) max_overlaps, gt_assignment = ious.max(1) max_overlaps = max_overlaps.cpu().numpy() # print("Best overlap is {}".format(max_overlaps.max())) # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start)) gt_assignment += g_start keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image, ROIS_PER_IMG) if keep_inds_np.size == 0: continue keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device()) labels_ = gt_classes[:, 1][gt_assignment[keep_inds]] bbox_target_ = gt_boxes[gt_assignment[keep_inds]] # Clamp labels_ for the background RoIs to 0 if num_fg < labels_.size(0): labels_[num_fg:] = 0 rois_ = torch.cat(( im_sorted[t_start:t_end, None][keep_inds].float(), all_boxes[t_start:t_end][keep_inds], ), 1) labels.append(labels_) rois.append(rois_) bbox_targets.append(bbox_target_) rois = torch.cat(rois, 0) labels = torch.cat(labels, 0) bbox_targets = torch.cat(bbox_targets, 0) return rois, labels, bbox_targets
def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None, batch_size=None, rois=None, od_box_deltas=None, im_sizes=None, image_offset=None, gt_classes=None, gt_boxes=None, ): """ Forward pass through the object and edge context :param obj_priors: :param obj_fmaps: :param im_inds: :param obj_labels: :param boxes: :return: """ obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight pos_embed = self.pos_embed(Variable(center_size(box_priors))) obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1) if self.mode == 'predcls': obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes)) else: if self.mode == 'sgcls': obj_dists2 = self.decoder_lin1(obj_pre_rep) obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, 1, 1024), 1) obj_dists2 = obj_dists2[1] obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024)) else: # this is for sgdet obj_dists2 = self.decoder_lin1(obj_pre_rep) perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data, None, box_priors) obj_dists2 = obj_dists2[perm].contiguous() obj_dists2 = PackedSequence(obj_dists2, torch.tensor(ls_transposed)) obj_dists2, lengths1 = pad_packed_sequence(obj_dists2, batch_first=False) obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, batch_size, 1024), batch_size)[1] obj_dists2, _ = pack_padded_sequence(obj_dists2, lengths1, batch_first=False) obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024)) obj_dists2 = obj_dists2[inv_perm] if (not self.training and not self.mode == 'gtbox') or self.mode in ('sgdet', 'refinerels'): # try: dont apply nms here, but after own obj_classifier nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes( obj_dists2.clone().detach(), rois, od_box_deltas.clone().detach(), im_sizes, ) im_inds = nms_imgs + image_offset obj_dists2 = obj_dists2[nms_inds] obj_fmap = obj_fmaps[nms_inds] box_deltas = od_box_deltas[nms_inds] box_priors = nms_boxes[:, 0] rois = rois[nms_inds] if self.training and not self.mode == 'gtbox': # NOTE: If we're doing this during training, we need to assign labels here. pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) rm_obj_labels = gt_classes[:, 1][argmax_overlaps] rm_obj_labels[max_overlaps < 0.5] = 0 else: rm_obj_labels = None if self.mode == 'sgdet' and not self.training: # have tried in training # NMS here for baseline probs = F.softmax(obj_dists2, 1) nms_mask = obj_dists2.data.clone() nms_mask.zero_() for c_i in range(1, obj_dists2.size(1)): scores_ci = probs.data[:, c_i] boxes_ci = nms_boxes.data[:, c_i] keep = apply_nms(scores_ci, boxes_ci, pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0), nms_thresh=0.5)#nms_thresh= 0.3 default nms_mask[:, c_i][keep] = 1 obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1 # this for sgdet test #obj_preds=obj_dists2[:,1:].max(1)[1] + 1 else: if self.mode == 'sgdet': # use gt obj_preds = rm_obj_labels if rm_obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1 # use_predicted label # obj_preds = obj_dists2[:, 1:].max(1)[1] + 1 else: obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1 if self.mode == 'sgdet': return obj_dists2, obj_preds, im_inds, box_priors, rm_obj_labels, rois, nms_boxes else: return obj_dists2, obj_preds
def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1 :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. """ fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION)) # to some extent, gt_classes[88,0] - image_offset = rois[12000, 0] gt_img_inds = gt_classes[:, 0] - image_offset all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0) # [88+12000, 4] ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0) # [88+12000, 1] # sort according to the image index from 0 to 5 (6 images) im_sorted, idx = torch.sort(ims_per_box, 0) all_boxes = all_boxes[idx] # Assume that the GT boxes are already sorted in terms of image id num_images = int(im_sorted[-1]) + 1 labels = [] rois = [] bbox_targets = [] for im_ind in range(num_images): # traversal each single image # find the index(row, column) of nonzero g_inds = (gt_img_inds == im_ind).nonzero() if g_inds.dim() == 0: continue g_inds = g_inds.squeeze(1) g_start = g_inds[0] # the start index of certain image in gt_img_inds g_end = g_inds[-1] + 1 # the end index of certain image in gt_img_inds t_inds = (im_sorted == im_ind).nonzero().squeeze(1) t_start = t_inds[0] # the start index of certain image in im_sorted t_end = t_inds[-1] + 1 # the end index of certain image in im_sorted # Max overlaps: for each predicted box, get the max ROI # Get the indices into the GT boxes too (must offset by the box start) # compare rois+gtbox and gtbox; ious [t_inds.shape[0], g_inds.shape[0]] ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end]) max_overlaps, gt_assignment = ious.max(1) # gt_assignment is a relative index max_overlaps = max_overlaps.cpu().numpy() # print("Best overlap is {}".format(max_overlaps.max())) # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start)) gt_assignment += g_start # the absolute index in gt_classes[:,0]; shape ex: [2011] means 2011 rois+gt boxes of certain image # keep_inds_np: foreground index + background index; [256,] ex: 39+217 keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image, ROIS_PER_IMG) if keep_inds_np.size == 0: continue # covert numpy array to LongTensor keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device()) labels_ = gt_classes[:, 1][gt_assignment[keep_inds]] # [256] * 6 = 1536 bbox_target_ = gt_boxes[gt_assignment[keep_inds]] #[256] * 6 = 1536 # Clamp labels_ for the background RoIs to 0 if num_fg < labels_.size(0): labels_[num_fg:] = 0 # rois: [256, 5] rois_ = torch.cat(( im_sorted[t_start:t_end, None][keep_inds].float(), all_boxes[t_start:t_end][keep_inds], ), 1) labels.append(labels_) rois.append(rois_) bbox_targets.append(bbox_target_) rois = torch.cat(rois, 0) labels = torch.cat(labels, 0) bbox_targets = torch.cat(bbox_targets, 0) # rois, labels, bbox_targets are all Tensor return rois, labels, bbox_targets
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, gt_boxes_human=None, gt_human_classes=None, train_anchor_inds_human=None, return_fmap=False): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param proposals: things :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: """ fmap = self.feature_map(x) if self.training: obj_emb = self.obj_embed(gt_classes[:, 1]) verb_emb = self.verb_embed(gt_human_classes[:, 1]) else: obj_emb = self.obj_embed.weight[1:] verb_emb = self.verb_embed.weight[1:] # Get boxes from RPN rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \ self.get_boxes(fmap, im_sizes, image_offset, self.rpn_head, obj_emb, gt_boxes, gt_classes, gt_rels, train_anchor_inds, proposals=proposals, mode="obj") # Get boxes from RPN_human rois_human, verb_labels, bbox_targets_human, rpn_scores_human, rpn_box_deltas_human, rel_labels = \ self.get_boxes(fmap, im_sizes, image_offset, self.rpn_head_human, None, gt_boxes_human, gt_human_classes, gt_rels, train_anchor_inds_human, proposals=proposals, mode="human") # Now classify them (obj) obj_fmap = self.obj_feature_map( fmap, rois) ## fmap: 1024*40*40, rois: 2048*5 od_obj_dists = self.score_fc(obj_fmap) ## obj_fmap: 2048*2048 od_box_deltas = self.bbox_fc(obj_fmap).view(-1, len( self.classes), 4) if self.mode != 'gtbox' else None od_box_priors = rois[:, 1:] ## verb classification human_fmap = self.obj_feature_map( fmap, rois_human) ## fmap: 1024*40*40, rois: 2048*5 od_human_dists = self.score_fc_human( human_fmap) ## obj_fmap: 2048*2048 od_human_box_deltas = self.bbox_fc_human(human_fmap).view( -1, 2, 4) if self.mode != 'gtbox' else None od_human_box_priors = rois_human[:, 1:] if (not self.training and not self.mode == 'gtbox') or self.mode in ('proposals', 'refinerels'): nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes( od_obj_dists, rois, od_box_deltas, im_sizes, ) im_inds = nms_imgs + image_offset obj_dists = od_obj_dists[nms_inds] obj_fmap = obj_fmap[nms_inds] box_deltas = od_box_deltas[nms_inds] box_priors = nms_boxes[:, 0] if (not self.training and not self.mode == 'gtbox'): # NOTE: If we're doing this during training, we need to assign labels here. pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) rm_obj_labels = gt_classes[:, 1][argmax_overlaps] rm_obj_labels[max_overlaps < 0.5] = 0 else: rm_obj_labels = None else: im_inds = rois[:, 0].long().contiguous() + image_offset nms_scores = None nms_preds = None nms_boxes_assign = None nms_boxes = None box_priors = rois[:, 1:] rm_obj_labels = obj_labels box_deltas = od_box_deltas obj_dists = od_obj_dists # rel_labels = rel_assignments(im_inds.data, box_priors.data, rm_obj_labels.data, # gt_boxes.data, gt_classes.data, gt_rels.data, # image_offset, filter_non_overlap=True, # num_sample_per_gt=1) return Result(od_obj_dists=od_obj_dists, rm_obj_dists=obj_dists, obj_scores=nms_scores, obj_preds=nms_preds, obj_fmap=obj_fmap, od_box_deltas=od_box_deltas, rm_box_deltas=box_deltas, od_box_targets=bbox_targets, rm_box_targets=bbox_targets, od_box_priors=od_box_priors, rm_box_priors=box_priors, boxes_assigned=nms_boxes_assign, boxes_all=nms_boxes, od_obj_labels=obj_labels, rm_obj_labels=rm_obj_labels, rpn_scores=rpn_scores, rpn_box_deltas=rpn_box_deltas, rel_labels=rel_labels, im_inds=im_inds, rpn_scores_human=rpn_scores_human, rpn_box_deltas_human=rpn_box_deltas_human, od_human_dists=od_human_dists, od_human_box_deltas=od_human_box_deltas, od_human_bbox_targets_human=bbox_targets_human, od_human_box_priors=od_human_box_priors, od_verb_labels=verb_labels, fmap=fmap if return_fmap else None)
def rpn_boxes(self, fmap, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, train_anchor_inds=None, proposals=None): """ Gets boxes from the RPN :param fmap: :param im_sizes: :param image_offset: :param gt_boxes: :param gt_classes: :param gt_rels: :param train_anchor_inds: :return: """ # rpn_feats:[6,37,37,20,6], scores+deltas of all anchors; RPN_Head.init & forward rpn_feats = self.rpn_head(fmap) # roi_proposals: pre_nms_topn=6000, post_nms_topn=1000, thres=0.7, filter boxes from 16w to 4275 (6 images) # get the first max(6000, #boxes) boxes per img, then apply nms, get max(1000, #boxes) per img # rois: [12000, 5] if rpntrain / [4000+, 5] if refinerels(sgdet) rois = self.rpn_head.roi_proposals( rpn_feats, im_sizes, nms_thresh=0.7, pre_nms_topn=12000 if self.training and self.mode == 'rpntrain' else 6000, post_nms_topn=2000 if self.training and self.mode == 'rpntrain' else 1000, ) #ipdb.set_trace() if self.training: if gt_boxes is None or gt_classes is None or train_anchor_inds is None: raise ValueError( "Must supply GT boxes, GT classes, trainanchors when in train mode" ) rpn_scores, rpn_box_deltas = self.rpn_head.anchor_preds( rpn_feats, train_anchor_inds, image_offset) if gt_rels is not None and self.mode == 'rpntrain': raise ValueError( "Training the object detector and the relationship model with detection" "at the same time isn't supported") # sgdet/refinerels if self.mode == 'refinerels': # NOTE: If we're doing this during training, we need to assign labels here. #ipdb.set_trace() pred_to_gtbox = bbox_overlaps( rois[:, 1:], gt_boxes.data) # [4000+, #gtboxes] im_inds = (rois[:, 0] + image_offset).long() # [4000+] pred_to_gtbox[im_inds[:, None] != gt_classes.data[ None, :, 0]] = 0.0 # gt_classes, (im_inds, class); match the image index max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) # gt labels assignmentï¼› gt_classes: [#gtbox,2] labels = gt_classes[:, 1][argmax_overlaps] labels[max_overlaps < 0.5] = 0 # bad boxes; invalid labels # gt boxes assignment bbox_targets = gt_boxes[argmax_overlaps, :] tem = max_overlaps.view(max_overlaps.size()[0], 1) bbox_targets.data[torch.cat( (tem, tem, tem, tem), 1) < 0.5] = 1 # arbitrary value # rois become not Tensor, unlike 'rpntrain' and 'gtbox' all_rois = Variable(rois) # Potentially you could add in GT rois if none match # is_match = (bbox_overlaps(rois[:,1:].contiguous(), gt_boxes.data) > 0.5).long() # gt_not_matched = (is_match.sum(0) == 0).nonzero() # # if gt_not_matched.dim() > 0: # gt_to_add = torch.cat((gt_classes[:,0,None][gt_not_matched.squeeze(1)].float(), # gt_boxes[gt_not_matched.squeeze(1)]), 1) # # all_rois = torch.cat((all_rois, gt_to_add),0) # num_gt = gt_to_add.size(0) #labels = None #bbox_targets = None rel_labels = None # 'rpntrain' / 'gtbox'(sgcls) else: # all_rois:[1536,4], and labels, bbox_targets are all Tensor all_rois, labels, bbox_targets = proposal_assignments_det( rois, gt_boxes.data, gt_classes.data, image_offset, fg_thresh=0.5) rel_labels = None else: all_rois = Variable(rois, volatile=True) labels = None bbox_targets = None rel_labels = None rpn_box_deltas = None rpn_scores = None return all_rois, labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels
def proposal_assignments_postnms( rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5, max_objs=100, max_rels=100, rand_val=0.01): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64) pred_boxes_np = rois[:, 1:].cpu().numpy() nms_inds_np = nms_inds.cpu().numpy() sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np) # split into chosen and suppressed chosen_inds_np = pred_inds_np[nms_inds_np] chosen_boxes_np = pred_boxes_np[nms_inds_np] suppre_inds_np = pred_inds_np[sup_inds_np] suppre_boxes_np = pred_boxes_np[sup_inds_np] gt_boxes_np = gt_boxes.cpu().numpy() gt_classes_np = gt_classes.cpu().numpy() gt_rels_np = gt_rels.cpu().numpy() gt_classes_np[:, 0] -= image_offset gt_rels_np[:, 0] -= image_offset num_im = gt_classes_np[:, 0].max()+1 rois = [] obj_labels = [] rel_labels = [] num_box_seen = 0 for im_ind in range(num_im): chosen_ind = np.where(chosen_inds_np == im_ind)[0] suppre_ind = np.where(suppre_inds_np == im_ind)[0] gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] gt_boxes_i = gt_boxes_np[gt_ind] gt_classes_i = gt_classes_np[gt_ind, 1] gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] # Get IOUs between chosen and GT boxes and if needed we'll add more in chosen_boxes_i = chosen_boxes_np[chosen_ind] suppre_boxes_i = suppre_boxes_np[suppre_ind] n_chosen = chosen_boxes_i.shape[0] n_suppre = suppre_boxes_i.shape[0] n_gt_box = gt_boxes_i.shape[0] # add a teensy bit of random noise because some GT boxes might be duplicated, etc. pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0) ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*( np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5) # Let's say that a box can only be assigned ONCE for now because we've already done # the NMS and stuff. is_hit = ious > fg_thresh obj_assignments_i = is_hit.argmax(1) obj_assignments_i[~is_hit.any(1)] = -1 vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True) obj_assignments_i[np.setdiff1d( np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1 extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen # Add them in somewhere at random num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0]) boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32) labels_i = np.zeros(num_inds_to_have, dtype=np.int64) inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False)) inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms) boxes_i[inds_from_nms] = chosen_boxes_i labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]] boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add] labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]] # Now, we do the relationships. same as for rle all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i), boxes_i, labels_i, gt_classes_i, gt_rels_i, fg_thresh=fg_thresh, fg_rels_per_image=100) all_rels_i[:,0:2] += num_box_seen rois.append(np.column_stack(( im_ind * np.ones(boxes_i.shape[0], dtype=np.float32), boxes_i, ))) obj_labels.append(labels_i) rel_labels.append(np.column_stack(( im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), all_rels_i, ))) num_box_seen += boxes_i.size rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True) labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True) rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(), async=True) return rois, labels, rel_labels
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, return_fmap=False): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels if test: prob dists, boxes, img inds, maxscores, classes """ # Detector result = self.detector(x, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, proposals, train_anchor_inds, return_fmap=True) if result.is_none(): return ValueError("heck") #rcnn_pred = result.rm_obj_dists[:, 1:].max(1)[1] + 1 # +1: because the index is in 150-d but truth is 151-d #rcnn_ap = torch.mean((rcnn_pred == result.rm_obj_labels).float().cpu()) im_inds = result.im_inds - image_offset # boxes: [#boxes, 4], without box deltas; where narrow error comes from, should .detach() boxes = result.rm_box_priors.detach() # Box and obj_dists APrecision obj_scores = F.softmax(result.rm_obj_dists, dim=1) result.rm_obj_preds = obj_scores.data[:, 1:].max(1)[1] result.rm_obj_preds = result.rm_obj_preds + 1 twod_inds = arange( result.rm_obj_preds) * self.num_classes + result.rm_obj_preds bboxes = result.boxes_all.view(-1, 4)[twod_inds].view( result.boxes_all.size(0), 4) pred_to_gtbox = bbox_overlaps(bboxes.data, gt_boxes.data) im_inds = result.im_inds pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) labels = gt_classes[:, 1][argmax_overlaps] labels[max_overlaps < 0.5] = 0 labels[result.rm_obj_preds != result.rm_obj_labels.data] = 0 result.ratio = torch.nonzero(labels).size(0) / labels.size(0) return result.ratio """
def rel_assignments_det(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ pred_inds_np = im_inds.cpu().numpy() pred_boxes_np = rpn_rois.cpu().numpy() pred_boxlabels_np = roi_gtlabels.cpu().numpy() gt_boxes_np = gt_boxes.cpu().numpy() gt_classes_np = gt_classes.cpu().numpy() gt_rels_np = gt_rels.cpu().numpy() gt_classes_np[:, 0] -= image_offset gt_rels_np[:, 0] -= image_offset num_im = int(gt_classes_np[:, 0].max() + 1) num_fg = int(REL_FG_FRACTION * 2048 * num_im) num_rels = int(2048 * num_im) # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np # )) fg_rels = [] num_box_seen = 0 bg_rels = [] for im_ind in range(num_im): pred_ind = np.where(pred_inds_np == im_ind)[0] im_f_i = np.zeros(pred_ind.shape[0], dtype=np.float32) gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] gt_boxes_i = gt_boxes_np[gt_ind] gt_classes_i = gt_classes_np[gt_ind, 1] gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] pred_boxes_i = pred_boxes_np[pred_ind] pred_boxlabels_i = pred_boxlabels_np[pred_ind] pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) rel_inds_i = np.stack(np.where((pbi_iou < 1) & (pbi_iou > 0)), -1) ious_i = bbox_overlaps(pred_boxes_i, gt_boxes_i) mask = (pred_boxlabels_i[:, None] == gt_classes_i[None, :]).astype( np.float32) min_ious_i = np.minimum(ious_i[rel_inds_i[:, 0], :][:, gt_rels_i[:, 0]], \ ious_i[rel_inds_i[:, 1], :][:, gt_rels_i[:, 1]]) * \ mask[rel_inds_i[:, 0], :][:, gt_rels_i[:, 0]] * \ mask[rel_inds_i[:, 1], :][:, gt_rels_i[:, 1]] all_rels_i = np.column_stack( (im_ind * np.ones(rel_inds_i.shape[0], dtype=np.int64), np.pad(rel_inds_i, ((0, 0), (0, 1)), 'constant'))) all_rels_i[:, 1:3] += num_box_seen fg_inds_i = np.where(min_ious_i.max(-1) >= fg_thresh)[0] all_rels_i[fg_inds_i, -1] = gt_rels_i[:, -1][min_ious_i.argmax(-1)[fg_inds_i]] fg_rels.append(all_rels_i[fg_inds_i]) bg_rels.append(all_rels_i[np.where(all_rels_i[:, -1] == 0)[0]]) num_box_seen += pred_boxes_i.shape[0] fg_rels = np.concatenate(fg_rels, 0) bg_rels = np.concatenate(bg_rels, 0) if fg_rels.shape[0] > num_fg: choice_ind = npr.choice(fg_rels.shape[0], num_fg, replace=False) fg_rels = fg_rels[choice_ind] num_bg = num_rels - fg_rels.shape[0] if num_bg > 0: if bg_rels.shape[0] > num_bg: # choice_ind = npr.choice(bg_rels.shape[0], num_bg, replace=False) choice_ind = torch.randperm( bg_rels.shape[0])[:num_bg].numpy().astype(np.int64) bg_rels = bg_rels[choice_ind] rel_labels = np.concatenate((fg_rels, bg_rels), 0) else: rel_labels = fg_rels rel_labels = torch.LongTensor(rel_labels).cuda(rpn_rois.get_device(), async=True) return rel_labels
def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)): det_res = detector[b] # if conf.num_gpus == 1: # det_res = [det_res] assert conf.num_gpus == 1 boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res gt_entry = { 'gt_classes': val.gt_classes[batch_num].copy(), 'gt_relations': val.relationships[batch_num].copy(), 'gt_boxes': val.gt_boxes[batch_num].copy(), } # gt_entry = {'gt_classes': gtc[i], 'gt_relations': gtr[i], 'gt_boxes': gtb[i]} assert np.all(objs_i[rels_i[:, 0]] > 0) and np.all( objs_i[rels_i[:, 1]] > 0) # assert np.all(rels_i[:, 2] > 0) pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } pred_to_gt, pred_5ples, rel_scores = evaluator[ conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, iou_thresh=0.5 if 'det' in conf.mode else 0.9) # SET RECALL THRESHOLD HERE pred_to_gt = pred_to_gt[:20] pred_5ples = pred_5ples[:20] # Get a list of objects that match, and GT objects that dont objs_match = (bbox_overlaps(pred_entry['pred_boxes'], gt_entry['gt_boxes']) >= 0.5) & (objs_i[:, None] == gt_entry['gt_classes'][None]) objs_matched = objs_match.any(1) has_seen = defaultdict(int) has_seen_gt = defaultdict(int) pred_ind2name = {} gt_ind2name = {} edges = {} missededges = {} badedges = {} if val.filenames[batch_num].startswith('2343676'): import ipdb ipdb.set_trace() def query_pred(pred_ind): if pred_ind not in pred_ind2name: has_seen[objs_i[pred_ind]] += 1 pred_ind2name[pred_ind] = '{}-{}'.format( train.ind_to_classes[objs_i[pred_ind]], has_seen[objs_i[pred_ind]]) return pred_ind2name[pred_ind] def query_gt(gt_ind): gt_cls = gt_entry['gt_classes'][gt_ind] if gt_ind not in gt_ind2name: has_seen_gt[gt_cls] += 1 gt_ind2name[gt_ind] = '{}-GT{}'.format( train.ind_to_classes[gt_cls], has_seen_gt[gt_cls]) return gt_ind2name[gt_ind] matching_pred5ples = pred_5ples[np.array([len(x) > 0 for x in pred_to_gt])] for fiveple in matching_pred5ples: head_name = query_pred(fiveple[0]) tail_name = query_pred(fiveple[1]) edges[(head_name, tail_name)] = train.ind_to_predicates[fiveple[4]] gt_5ples = np.column_stack(( gt_entry['gt_relations'][:, :2], gt_entry['gt_classes'][gt_entry['gt_relations'][:, 0]], gt_entry['gt_classes'][gt_entry['gt_relations'][:, 1]], gt_entry['gt_relations'][:, 2], )) has_match = reduce(np.union1d, pred_to_gt) for gt in gt_5ples[np.setdiff1d(np.arange(gt_5ples.shape[0]), has_match)]: # Head and tail namez = [] # miss or missclassify or miss in topk. for i in range(2): matching_obj = np.where(objs_match[:, gt[i]])[0] if matching_obj.size > 0: name = query_pred(matching_obj[0]) else: name = query_gt(gt[i]) namez.append(name) missededges[tuple(namez)] = train.ind_to_predicates[gt[4]] for fiveple in pred_5ples[np.array([len(x) == 0 for x in pred_to_gt])]: # the objs in scene graph but the edges not. if fiveple[0] in pred_ind2name: if fiveple[1] in pred_ind2name: badedges[(pred_ind2name[fiveple[0]], pred_ind2name[fiveple[1]] )] = train.ind_to_predicates[fiveple[4]] theimg = load_unscaled(val.filenames[batch_num]) theimg2 = theimg.copy() draw2 = ImageDraw.Draw(theimg2) # Fix the names for pred_ind in pred_ind2name.keys(): draw2 = draw_box(draw2, pred_entry['pred_boxes'][pred_ind], cls_ind=objs_i[pred_ind], text_str=pred_ind2name[pred_ind]) for gt_ind in gt_ind2name.keys(): draw2 = draw_box(draw2, gt_entry['gt_boxes'][gt_ind], cls_ind=gt_entry['gt_classes'][gt_ind], text_str=gt_ind2name[gt_ind]) recall = int(100 * len(reduce(np.union1d, pred_to_gt)) / gt_entry['gt_relations'].shape[0]) id = '{}-{}'.format(val.filenames[batch_num].split('/')[-1][:-4], recall) containname = os.path.join(conf.save_dir, conf.mode + '-qualitative-top' + str(20)) if not os.path.exists(containname): os.mkdir(containname) pathname = os.path.join(containname, id) if not os.path.exists(pathname): os.mkdir(pathname) # theimg.save(os.path.join(pathname, 'img.jpg'), quality=100, subsampling=0) theimg2.save(os.path.join(pathname, 'imgbox.jpg'), quality=100, subsampling=0) with open(os.path.join(pathname, 'shit.txt'), 'w') as f: pred_objs = ' ,'.join( [train.ind_to_classes[i] for i in pred_entry['pred_classes']]) gt_objs = ' ,'.join( [train.ind_to_classes[i] for i in gt_entry['gt_classes']]) f.write('pred objs:\n' + pred_objs + '\n') f.write('gt objs:\n' + gt_objs + '\n') f.write('good:\n') for (o1, o2), p in edges.items(): f.write('{} - {} - {}\n'.format(o1, p, o2)) f.write('fn:\n') for (o1, o2), p in missededges.items(): f.write('{} - {} - {}\n'.format(o1, p, o2)) f.write('shit:\n') for (o1, o2), p in badedges.items(): f.write('{} - {} - {}\n'.format(o1, p, o2)) pass
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, return_fmap=False): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels if test: prob dists, boxes, img inds, maxscores, classes """ # Detector result = self.detector(x, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, proposals, train_anchor_inds, return_fmap=True) if result.is_none(): return ValueError("heck") im_inds = result.im_inds - image_offset # boxes: [#boxes, 4], without box deltas; where narrow error comes from, should .detach() boxes = result.rm_box_priors # .detach() if self.training and result.rel_labels is None: assert self.mode == 'sgdet' # sgcls's result.rel_labels is gt and not None # rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) result.rel_labels = rel_assignments(im_inds.data, boxes.data, result.rm_obj_labels.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset, filter_non_overlap=True, num_sample_per_gt=1) #torch.cat((result.rel_labels[:,0].contiguous().view(rel_inds.size(0),1),result.rm_obj_labels[result.rel_labels[:,1]].view(rel_inds.size(0),1),result.rm_obj_labels[result.rel_labels[:,2]].view(rel_inds.size(0),1),result.rel_labels[:,3].contiguous().view(rel_inds.size(0),1)),-1) #bbox_overlaps(boxes.data[55:57].contiguous().view(-1,1), boxes.data[8].contiguous().view(-1,1)) rel_inds = self.get_rel_inds(result.rel_labels, im_inds, boxes) #[275,3], [im_inds, box1_inds, box2_inds] # rois: [#boxes, 5] rois = torch.cat((im_inds[:, None].float(), boxes), 1) # result.rm_obj_fmap: [384, 4096] #result.rm_obj_fmap = self.obj_feature_map(result.fmap.detach(), rois) # detach: prevent backforward flowing result.rm_obj_fmap = self.obj_feature_map(result.fmap.detach(), rois) # detach: prevent backforward flowing # BiLSTM result.rm_obj_dists, result.rm_obj_preds, edge_ctx = self.context( result.rm_obj_fmap, # has been detached above # rm_obj_dists: [#boxes, 151]; Prevent gradients from flowing back into score_fc from elsewhere result.rm_obj_dists.detach(), # .detach:Returns a new Variable, detached from the current graph im_inds, result.rm_obj_labels if self.training or self.mode == 'predcls' else None, boxes.data, result.boxes_all if self.mode == 'sgdet' else result.boxes_all) # Post Processing # nl_egde <= 0 if edge_ctx is None: edge_rep = self.post_emb(result.rm_obj_preds) # nl_edge > 0 else: edge_rep = self.post_lstm(edge_ctx) # [384, 4096*2] # Split into subject and object representations edge_rep = edge_rep.view(edge_rep.size(0), 2, self.pooling_dim) #[384,2,4096] subj_rep = edge_rep[:, 0] # [384,4096] obj_rep = edge_rep[:, 1] # [384,4096] prod_rep = subj_rep[rel_inds[:, 1]] * obj_rep[rel_inds[:, 2]] # prod_rep, rel_inds: [275,4096], [275,3] if self.use_vision: # True when sgdet # union rois: fmap.detach--RoIAlignFunction--roifmap--vr [275,4096] vr = self.visual_rep(result.fmap.detach(), rois, rel_inds[:, 1:]) if self.limit_vision: # False when sgdet # exact value TBD prod_rep = torch.cat((prod_rep[:,:2048] * vr[:,:2048], prod_rep[:,2048:]), 1) else: prod_rep = prod_rep * vr # [275,4096] if self.use_tanh: # False when sgdet prod_rep = F.tanh(prod_rep) result.rel_dists = self.rel_compress(prod_rep) # [275,51] if self.use_bias: # True when sgdet result.rel_dists = result.rel_dists + self.freq_bias.index_with_labels(torch.stack(( result.rm_obj_preds[rel_inds[:, 1]], result.rm_obj_preds[rel_inds[:, 2]], ), 1)) # Attention: pos should use rm_obj_labes/rel_labels for obj/rel scores; neg should use rm_obj_preds/max_rel_score for obj/rel scores if self.training: judge = result.rel_labels.data[:,3] != 0 if judge.sum() != 0: # gt_rel exit in rel_inds # positive overall score select_rel_inds = torch.arange(rel_inds.size(0)).view(-1,1).long().cuda()[result.rel_labels.data[:,3] != 0] com_rel_inds = rel_inds[select_rel_inds] twod_inds = arange(result.rm_obj_labels.data) * self.num_classes + result.rm_obj_labels.data # dist: [-10,10] result.obj_scores = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds] # only 1/4 of 384 obj_dists will be updated; because only 1/4 objs's labels are not 0 obj_scores0 = result.obj_scores[com_rel_inds[:,1]] obj_scores1 = result.obj_scores[com_rel_inds[:,2]] rel_rep = F.softmax(result.rel_dists[select_rel_inds], dim=1) # result.rel_dists has grad rel_score = rel_rep.gather(1, result.rel_labels[select_rel_inds][:,3].contiguous().view(-1,1)).view(-1) # not use squeeze(); SqueezeBackward, GatherBackward prob_score = rel_score * obj_scores0 * obj_scores1 # negative overall score rel_cands = im_inds.data[:, None] == im_inds.data[None] rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 # self relation = 0 if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(boxes.data, boxes.data) > 0) # Require overlap for detection rel_cands = rel_cands.nonzero() # [#, 2] if rel_cands.dim() == 0: print("rel_cands.dim() == 0!!!!!!!!!!!!!!!!!!!!!!!!!!!!") rel_cands = im_inds.data.new(1, 2).fill_(0) # shaped: [1,2], [0, 0] rel_cands = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) # rel_cands' value should be [0, 384] rel_inds_neg = rel_cands vr_neg = self.visual_rep(result.fmap.detach(), rois, rel_inds_neg[:, 1:]) subj_obj = subj_rep[rel_inds_neg[:, 1]] * obj_rep[rel_inds_neg[:, 2]] prod_rep_neg = subj_obj * vr_neg rel_dists_neg = self.rel_compress(prod_rep_neg) all_rel_rep_neg = F.softmax(rel_dists_neg, dim=1) _, pred_classes_argmax_neg = all_rel_rep_neg.data[:,1:].max(1) pred_classes_argmax_neg = pred_classes_argmax_neg + 1 all_rel_pred_neg = torch.cat((rel_inds_neg, pred_classes_argmax_neg.view(-1,1)), 1) ind_old = torch.ones(all_rel_pred_neg.size(0)).byte().cuda() for i in range(com_rel_inds.size(0)): # delete those box pair with same rel type as pos triplets ind_i = (all_rel_pred_neg[:,0] == com_rel_inds[i, 0]) & (all_rel_pred_neg[:,1] == com_rel_inds[i, 1]) & (result.rm_obj_preds.data[all_rel_pred_neg[:,1]] == result.rm_obj_labels.data[com_rel_inds[i, 1]]) & (all_rel_pred_neg[:,2] == com_rel_inds[i, 2]) & (result.rm_obj_preds.data[all_rel_pred_neg[:,2]] == result.rm_obj_labels.data[com_rel_inds[i, 2]]) & (all_rel_pred_neg[:,3] == result.rel_labels.data[select_rel_inds][i,3]) ind_i = (1 - ind_i).byte() ind_old = ind_i & ind_old rel_inds_neg = rel_inds_neg.masked_select(ind_old.view(-1,1).expand(-1,3) == 1).view(-1,3) rel_rep_neg = all_rel_rep_neg.masked_select(Variable(ind_old.view(-1,1).expand(-1,51)) == 1).view(-1,51) pred_classes_argmax_neg = pred_classes_argmax_neg.view(-1,1)[ind_old.view(-1,1) == 1] rel_labels_pred_neg = all_rel_pred_neg.masked_select(ind_old.view(-1,1).expand(-1,4) == 1).view(-1,4) max_rel_score_neg = rel_rep_neg.gather(1, Variable(pred_classes_argmax_neg.view(-1,1))).view(-1) # not use squeeze() twod_inds_neg = arange(result.rm_obj_preds.data) * self.num_classes + result.rm_obj_preds.data obj_scores_neg = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds_neg] obj_scores0_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,1]]) obj_scores1_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,2]]) all_score_neg = max_rel_score_neg * obj_scores0_neg * obj_scores1_neg # delete those triplet whose score is lower than pos triplets prob_score_neg = all_score_neg[all_score_neg.data > prob_score.data.min()] if (all_score_neg.data > prob_score.data.min()).sum() != 0 else all_score_neg # use all rel_inds, already irrelavant with im_inds, which is only use to extract region from img and produce rel_inds # 384 boxes---(rel_inds)(rel_inds_neg)--->prob_score,prob_score_neg flag = torch.cat((torch.ones(prob_score.size(0),1).cuda(),torch.zeros(prob_score_neg.size(0),1).cuda()),0) all_prob = torch.cat((prob_score,prob_score_neg), 0) # Variable, [#pos_inds+#neg_inds, 1] _, sort_prob_inds = torch.sort(all_prob.data, dim=0, descending=True) sorted_flag = flag[sort_prob_inds].view(-1) # can be used to check distribution of pos and neg sorted_all_prob = all_prob[sort_prob_inds] # Variable # positive triplet score pos_exp = sorted_all_prob[sorted_flag == 1] # Variable # negative triplet score neg_exp = sorted_all_prob[sorted_flag == 0] # Variable # determine how many rows will be updated in rel_dists_neg pos_repeat = torch.zeros(1, 1) neg_repeat = torch.zeros(1, 1) for i in range(pos_exp.size(0)): if ( neg_exp.data > pos_exp.data[i] ).sum() != 0: int_part = (neg_exp.data > pos_exp.data[i]).sum() temp_pos_inds = torch.ones(int_part) * i pos_repeat = torch.cat((pos_repeat, temp_pos_inds.view(-1,1)), 0) temp_neg_inds = torch.arange(int_part) neg_repeat = torch.cat((neg_repeat, temp_neg_inds.view(-1,1)), 0) else: temp_pos_inds = torch.ones(1)* i pos_repeat = torch.cat((pos_repeat, temp_pos_inds.view(-1,1)), 0) temp_neg_inds = torch.arange(1) neg_repeat = torch.cat((neg_repeat, temp_neg_inds.view(-1,1)), 0) """ int_part = neg_exp.size(0) // pos_exp.size(0) decimal_part = neg_exp.size(0) % pos_exp.size(0) int_inds = torch.arange(pos_exp.size(0))[:,None].expand_as(torch.Tensor(pos_exp.size(0), int_part)).contiguous().view(-1) int_part_inds = (int(pos_exp.size(0) -1) - int_inds).long().cuda() # use minimum pos to correspond maximum negative if decimal_part == 0: expand_inds = int_part_inds else: expand_inds = torch.cat((torch.arange(pos_exp.size(0))[(pos_exp.size(0) - decimal_part):].long().cuda(), int_part_inds), 0) result.pos = pos_exp[expand_inds] result.neg = neg_exp result.anchor = Variable(torch.zeros(result.pos.size(0)).cuda()) """ result.pos = pos_exp[pos_repeat.cuda().long().view(-1)] result.neg = neg_exp[neg_repeat.cuda().long().view(-1)] result.anchor = Variable(torch.zeros(result.pos.size(0)).cuda()) result.ratio = torch.ones(3).cuda() result.ratio[0] = result.ratio[0] * (sorted_flag.nonzero().min() / (prob_score.size(0) + all_score_neg.size(0))) result.ratio[1] = result.ratio[1] * (sorted_flag.nonzero().max() / (prob_score.size(0) + all_score_neg.size(0))) result.ratio[2] = result.ratio[2] * (prob_score.size(0) + all_score_neg.size(0)) return result else: # no gt_rel in rel_inds print("no gt_rel in rel_inds!!!!!!!!!!!!!!!!!!!!!!!!!!!!") ipdb.set_trace() # testing triplet proposal rel_cands = im_inds.data[:, None] == im_inds.data[None] # self relation = 0 rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 # Require overlap for detection if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(boxes.data, boxes.data) > 0) rel_cands = rel_cands.nonzero() if rel_cands.dim() == 0: print("rel_cands.dim() == 0!!!!!!!!!!!!!!!!!!!!!!!!!!!!") rel_cands = im_inds.data.new(1, 2).fill_(0) rel_cands = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) rel_labels_neg = rel_cands rel_inds_neg = rel_cands twod_inds_neg = arange(result.rm_obj_preds.data) * self.num_classes + result.rm_obj_preds.data obj_scores_neg = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds_neg] vr_neg = self.visual_rep(result.fmap.detach(), rois, rel_inds_neg[:, 1:]) subj_obj = subj_rep[rel_inds_neg[:, 1]] * obj_rep[rel_inds_neg[:, 2]] prod_rep_neg = subj_obj * vr_neg rel_dists_neg = self.rel_compress(prod_rep_neg) # negative overall score obj_scores0_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,1]]) obj_scores1_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,2]]) rel_rep_neg = F.softmax(rel_dists_neg, dim=1) _, pred_classes_argmax_neg = rel_rep_neg.data[:,1:].max(1) pred_classes_argmax_neg = pred_classes_argmax_neg + 1 max_rel_score_neg = rel_rep_neg.gather(1, Variable(pred_classes_argmax_neg.view(-1,1))).view(-1) # not use squeeze() prob_score_neg = max_rel_score_neg * obj_scores0_neg * obj_scores1_neg result.pos = Variable(torch.zeros(prob_score_neg.size(0)).cuda()) result.neg = prob_score_neg result.anchor = Variable(torch.zeros(prob_score_neg.size(0)).cuda()) result.ratio = torch.ones(3,1).cuda() return result ###################### Testing ########################### # extract corrsponding scores according to the box's preds twod_inds = arange(result.rm_obj_preds.data) * self.num_classes + result.rm_obj_preds.data result.obj_scores = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds] # [384] # Bbox regression if self.mode == 'sgdet': bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4) else: # Boxes will get fixed by filter_dets function. bboxes = result.rm_box_priors rel_rep = F.softmax(result.rel_dists, dim=1) # [275, 51] # sort product of obj1 * obj2 * rel return filter_dets(bboxes, result.obj_scores, result.rm_obj_preds, rel_inds[:, 1:], rel_rep)
def rel_assignments_sgdet(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64)) pred_inds_np = im_inds.cpu().numpy() pred_boxes_np = rpn_rois.cpu().numpy() pred_boxlabels_np = roi_gtlabels.cpu().numpy() gt_boxes_np = gt_boxes.cpu().numpy() gt_classes_np = gt_classes.cpu().numpy() gt_rels_np = gt_rels.cpu().numpy() gt_classes_np[:, 0] -= image_offset gt_rels_np[:, 0] -= image_offset num_im = gt_classes_np[:, 0].max()+1 # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np # )) rel_labels = [] num_box_seen = 0 for im_ind in range(num_im): pred_ind = np.where(pred_inds_np == im_ind)[0] gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] gt_boxes_i = gt_boxes_np[gt_ind] gt_classes_i = gt_classes_np[gt_ind, 1] gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] # [num_pred, num_gt] pred_boxes_i = pred_boxes_np[pred_ind] pred_boxlabels_i = pred_boxlabels_np[pred_ind] ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh) # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) if filter_non_overlap: rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) rels_intersect = rel_possibilities else: rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]), dtype=np.int64) - np.eye(pred_boxes_i.shape[0], dtype=np.int64) rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) # ONLY select relations between ground truth because otherwise we get useless data rel_possibilities[pred_boxlabels_i == 0] = 0 rel_possibilities[:, pred_boxlabels_i == 0] = 0 # Sample the GT relationships. fg_rels = [] p_size = [] for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i): fg_rels_i = [] fg_scores_i = [] for from_ind in np.where(is_match[:, from_gtind])[0]: for to_ind in np.where(is_match[:, to_gtind])[0]: if from_ind != to_ind: fg_rels_i.append((from_ind, to_ind, rel_id)) fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) rel_possibilities[from_ind, to_ind] = 0 if len(fg_rels_i) == 0: continue p = np.array(fg_scores_i) p = p / p.sum() p_size.append(p.shape[0]) num_to_add = min(p.shape[0], num_sample_per_gt) for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): fg_rels.append(fg_rels_i[rel_to_add]) fg_rels = np.array(fg_rels, dtype=np.int64) if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] elif fg_rels.size == 0: fg_rels = np.zeros((0, 3), dtype=np.int64) bg_rels = np.column_stack(np.where(rel_possibilities)) bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0]) if bg_rels.size > 0: # Sample 4x as many intersecting relationships as non-intersecting. # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] # p = bg_rels_intersect.astype(np.float32) # p[bg_rels_intersect == 0] = 0.2 # p[bg_rels_intersect == 1] = 0.8 # p /= p.sum() bg_rels = bg_rels[ np.random.choice(bg_rels.shape[0], #p=p, size=num_bg_rel, replace=False)] else: bg_rels = np.zeros((0, 3), dtype=np.int64) if fg_rels.size == 0 and bg_rels.size == 0: # Just put something here bg_rels = np.array([[0, 0, 0]], dtype=np.int64) # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) all_rels_i = np.concatenate((fg_rels, bg_rels), 0) all_rels_i[:,0:2] += num_box_seen all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))] rel_labels.append(np.column_stack(( im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), all_rels_i, ))) num_box_seen += pred_boxes_i.shape[0] rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(), async=True) return rel_labels
def _sel_rels(ious, pred_boxes, pred_labels, gt_classes, gt_rels, fg_thresh=0.5, fg_rels_per_image=128, num_sample_per_gt=1, filter_non_overlap=True): """ Selects the relations needed :param ious: [num_pred', num_gt] :param pred_boxes: [num_pred', num_gt] :param pred_labels: [num_pred'] :param gt_classes: [num_gt] :param gt_rels: [num_gtrel, 3] :param fg_thresh: :param fg_rels_per_image: :return: new rels, [num_predrel, 3] where each is (pred_ind1, pred_ind2, predicate) """ is_match = (ious >= fg_thresh) & (pred_labels[:, None] == gt_classes[None, :]) pbi_iou = bbox_overlaps(pred_boxes, pred_boxes) # Limit ourselves to only IOUs that overlap, but are not the exact same box # since we duplicated stuff earlier. if filter_non_overlap: rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) rels_intersect = rel_possibilities else: rel_possibilities = np.ones( (pred_labels.shape[0], pred_labels.shape[0]), dtype=np.int64) - np.eye(pred_labels.shape[0], dtype=np.int64) rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) # ONLY select relations between ground truth because otherwise we get useless data rel_possibilities[pred_labels == 0] = 0 rel_possibilities[:, pred_labels == 0] = 0 # For each GT relationship, sample exactly 1 relationship. fg_rels = [] p_size = [] for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels): fg_rels_i = [] fg_scores_i = [] for from_ind in np.where(is_match[:, from_gtind])[0]: for to_ind in np.where(is_match[:, to_gtind])[0]: if from_ind != to_ind: fg_rels_i.append((from_ind, to_ind, rel_id)) fg_scores_i.append( (ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) rel_possibilities[from_ind, to_ind] = 0 if len(fg_rels_i) == 0: continue p = np.array(fg_scores_i) p = p / p.sum() p_size.append(p.shape[0]) num_to_add = min(p.shape[0], num_sample_per_gt) for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): fg_rels.append(fg_rels_i[rel_to_add]) bg_rels = np.column_stack(np.where(rel_possibilities)) bg_rels = np.column_stack( (bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) fg_rels = np.array(fg_rels, dtype=np.int64) if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] # print("{} scores for {} GT. max={} min={} BG rels {}".format( # fg_rels_scores.shape[0], gt_rels.shape[0], fg_rels_scores.max(), fg_rels_scores.min(), # bg_rels.shape)) elif fg_rels.size == 0: fg_rels = np.zeros((0, 3), dtype=np.int64) num_bg_rel = min(RELS_PER_IMG - fg_rels.shape[0], bg_rels.shape[0]) if bg_rels.size > 0: # Sample 4x as many intersecting relationships as non-intersecting. bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] p = bg_rels_intersect.astype(np.float32) p[bg_rels_intersect == 0] = 0.2 p[bg_rels_intersect == 1] = 0.8 p /= p.sum() bg_rels = bg_rels[np.random.choice(bg_rels.shape[0], p=p, size=num_bg_rel, replace=False)] else: bg_rels = np.zeros((0, 3), dtype=np.int64) #print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) all_rels = np.concatenate((fg_rels, bg_rels), 0) # Sort by 2nd ind and then 1st ind all_rels = all_rels[np.lexsort((all_rels[:, 1], all_rels[:, 0]))] return all_rels
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, return_fmap=False): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param proposals: things :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: """ fmap = self.feature_map(x) # Get boxes from RPN rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \ self.get_boxes(fmap, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, train_anchor_inds, proposals=proposals) # Now classify them obj_fmap = self.obj_feature_map(fmap, rois) od_obj_dists = self.score_fc(obj_fmap) od_box_deltas = self.bbox_fc(obj_fmap).view( -1, len(self.classes), 4) if self.mode != 'gtbox' else None od_box_priors = rois[:, 1:] if (not self.training and not self.mode == 'gtbox') or self.mode in ('proposals', 'refinerels'): nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes( od_obj_dists, rois, od_box_deltas, im_sizes, ) im_inds = nms_imgs + image_offset obj_dists = od_obj_dists[nms_inds] obj_fmap = obj_fmap[nms_inds] box_deltas = od_box_deltas[nms_inds] box_priors = nms_boxes[:, 0] if self.training and not self.mode == 'gtbox': # NOTE: If we're doing this during training, we need to assign labels here. pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0 max_overlaps, argmax_overlaps = pred_to_gtbox.max(1) rm_obj_labels = gt_classes[:, 1][argmax_overlaps] rm_obj_labels[max_overlaps < 0.5] = 0 else: rm_obj_labels = None else: im_inds = rois[:, 0].long().contiguous() + image_offset nms_scores = None nms_preds = None nms_boxes_assign = None nms_boxes = None box_priors = rois[:, 1:] rm_obj_labels = obj_labels box_deltas = od_box_deltas obj_dists = od_obj_dists return Result( od_obj_dists=od_obj_dists, rm_obj_dists=obj_dists, obj_scores=nms_scores, obj_preds=nms_preds, obj_fmap=obj_fmap, od_box_deltas=od_box_deltas, rm_box_deltas=box_deltas, od_box_targets=bbox_targets, rm_box_targets=bbox_targets, od_box_priors=od_box_priors, rm_box_priors=box_priors, boxes_assigned=nms_boxes_assign, boxes_all=nms_boxes, od_obj_labels=obj_labels, rm_obj_labels=rm_obj_labels, rpn_scores=rpn_scores, rpn_box_deltas=rpn_box_deltas, rel_labels=rel_labels, im_inds=im_inds, fmap=fmap if return_fmap else None, )
def rel_assignments(im_inds, rpn_rois, roi_gtlabels, roi_predscore, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ if conf.sl_train: rels_per_image = RELS_PER_IMG_SGDET_SL elif conf.rl_train: rels_per_image = RELS_PER_IMG_SGDET_RL else: raise ValueError fg_rels_per_image = int(np.round(REL_FG_FRACTION * rels_per_image)) pred_inds_np = im_inds.cpu().numpy() pred_boxes_np = rpn_rois.cpu().numpy() pred_boxlabels_np = roi_gtlabels.cpu().numpy() gt_boxes_np = gt_boxes.cpu().numpy() gt_classes_np = gt_classes.cpu().numpy() gt_rels_np = gt_rels.cpu().numpy() # test1_4 change roi_pred_score to each object score roi_predscore_np = roi_predscore.cpu().numpy() norm_roi_predscore_np = np.exp(roi_predscore_np) / ( np.exp(roi_predscore_np + 1e-8).sum(1)[:, None]) each_roi_predlabel = np.argmax(norm_roi_predscore_np[:, 1:], 1) + 1 each_roi_predscore = norm_roi_predscore_np[ np.arange(norm_roi_predscore_np.shape[0]), each_roi_predlabel] # extra add for new_visual_genome.py # gt_rels_np = rel_assign_dist2idx(gt_rels_np) gt_classes_np[:, 0] -= image_offset gt_rels_np[:, 0] -= image_offset num_im = gt_classes_np[:, 0].max() + 1 # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np # )) rel_labels = [] num_box_seen = 0 for im_ind in range(num_im): pred_ind = np.where(pred_inds_np == im_ind)[0] gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] gt_boxes_i = gt_boxes_np[gt_ind] gt_classes_i = gt_classes_np[gt_ind, 1] gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] # [num_pred, num_gt] pred_boxes_i = pred_boxes_np[pred_ind] pred_boxlabels_i = pred_boxlabels_np[pred_ind] # test1_4 pred_score_i = each_roi_predscore[pred_ind] ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) is_match = (pred_boxlabels_i[:, None] == gt_classes_i[None]) & (ious >= fg_thresh) # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) if filter_non_overlap: rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) rels_intersect = rel_possibilities else: rel_possibilities = np.ones( (pred_boxes_i.shape[0], pred_boxes_i.shape[0]), dtype=np.int64) - np.eye(pred_boxes_i.shape[0], dtype=np.int64) rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) # extra set to comments # ONLY select relations between ground truth because otherwise we get useless data # rel_possibilities[pred_boxlabels_i == 0] = 0 # rel_possibilities[:, pred_boxlabels_i == 0] = 0 # Sample the GT relationships. fg_rels = [] p_size = [] for i, each_gt_rels_i in enumerate(gt_rels_i): from_gtind, to_gtind, rel_id = each_gt_rels_i[0], each_gt_rels_i[ 1], each_gt_rels_i[2:] fg_rels_i = [] fg_scores_i = [] for from_ind in np.where(is_match[:, from_gtind])[0]: for to_ind in np.where(is_match[:, to_gtind])[0]: if from_ind != to_ind: fg_rels_i.append( np.concatenate( (np.array([from_ind, to_ind]), rel_id), 0)) fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) rel_possibilities[from_ind, to_ind] = 0 if len(fg_rels_i) == 0: continue p = np.array(fg_scores_i) p = p / p.sum() p_size.append(p.shape[0]) num_to_add = min(p.shape[0], num_sample_per_gt) for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): fg_rels.append(fg_rels_i[rel_to_add]) if len(fg_rels) > 0: fg_rels = np.vstack(fg_rels) if fg_rels.shape[0] > fg_rels_per_image: fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] else: fg_rels = np.zeros((0, 53), dtype=np.int64) bg_rels = np.column_stack(np.where(rel_possibilities)) bg_rels = np.column_stack( (bg_rels, np.ones(bg_rels.shape[0], dtype=np.int64), np.zeros((bg_rels.shape[0], 50), dtype=np.int64))) num_bg_rel = min(rels_per_image - fg_rels.shape[0], bg_rels.shape[0]) if bg_rels.size > 0: # test1_2, test1_3 # origin # bg_rels = bg_rels[ # np.random.choice(bg_rels.shape[0], # #p=p, # size=num_bg_rel, replace=False)] # test1_4 sub_pred_score_i = pred_score_i[bg_rels[:, 0]] obj_pred_score_i = pred_score_i[bg_rels[:, 1]] bg_rels_idx_i = np.argsort( sub_pred_score_i * obj_pred_score_i)[-num_bg_rel:] # larget in the tail bg_rels = bg_rels[bg_rels_idx_i] else: bg_rels = np.zeros((0, 53), dtype=np.int64) if fg_rels.size == 0 and bg_rels.size == 0: # Just put something here bg_rels = np.zeros((0, 53), dtype=np.int64) # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) all_rels_i = np.concatenate((fg_rels, bg_rels), 0) all_rels_i[:, 0:2] += num_box_seen all_rels_i = all_rels_i[np.lexsort((all_rels_i[:, 1], all_rels_i[:, 0]))] rel_labels.append( np.column_stack(( im_ind * np.ones(all_rels_i.shape[0], dtype=np.int64), all_rels_i, ))) num_box_seen += pred_boxes_i.shape[0] rel_labels_np = np.concatenate(rel_labels, 0) # extra add for new_visual_genome.py # num_rel_labels = rel_labels_np.shape[0] # rel_labels_tail = np.zeros((num_rel_labels, 51)) # rel_labels_tail[range(num_rel_labels), rel_labels_np[:, -1]] = 1 # rel_labels_head = rel_labels_np[:, :3] # rel_labels_np = np.concatenate((rel_labels_head, rel_labels_tail), 1) rel_labels = torch.LongTensor(rel_labels_np).cuda(rpn_rois.get_device(), async=True) return rel_labels