def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False, rel_cats=None): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = {} for rel_cat_id, rel_cat_name in rel_cats.items(): pred_to_gt[rel_cat_name] = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: pred_to_gt['all_rel_cates'][i].append(int(gt_ind)) pred_to_gt[rel_cats[gt_triplets[int(gt_ind), 1]]][i].append(int(gt_ind)) return pred_to_gt
def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: pred_to_gt: (NumOfPredRels, list) list of list, each pred_to_gt[i] means the i-th predication boxes matching gt rel list e.g. pred_to_gt[0] = [1, 2] means the 0-th pred rels match the 1 and 2 ground truth relation """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets # keeps: (NumOfGTRels, NumOfPredRels) boolean keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: # for each matched pred_boxes pred_to_gt[i].append(int(gt_ind)) return pred_to_gt
def findMatched(objects, obj, all_labels, label, max_iou): if objects.shape[0] == 0: idx = objects.shape[0] objects = np.vstack((objects, obj)) all_labels.append(label) return idx, objects, all_labels ov = bbox_overlaps(objects, obj) if np.max(ov) < max_iou: # this is a new object idx = objects.shape[0] objects = np.vstack((objects, obj)) all_labels.append(label) else: cand_ids = np.where(ov >= max_iou)[0] cand_ovs = ov[cand_ids].reshape(-1) cand_ids = cand_ids[np.argsort(cand_ovs * -1)] mark = False for cand_id in cand_ids: if all_labels[cand_id] == label: idx = cand_id mark = True break if mark is False: idx = objects.shape[0] objects = np.vstack((objects, obj)) all_labels.append(label) return idx, objects, all_labels
def load_graphs(graphs_file, filter_non_overlap=False): """ Load the file containing the GT boxes and relations, as well as the dataset split :param graphs_file: :param filter_empty_rels: (will be filtered otherwise.) :param filter_non_overlap: If training, filter images that dont overlap. :return: image_index: numpy array corresponding to the index of images we're using boxes: List where each element is a [num_gt, 4] array of ground truth boxes (x1, y1, x2, y2) gt_classes: List where each element is a [num_gt] array of classes relationships: List where each element is a [num_r, 3] array of (box_ind_1, box_ind_2, predicate) relationships """ graph_annos = json.load(open(graphs_file)) # Get everything by image. boxes = [] gt_classes = [] relationships = [] filenames = [] for i, entry in enumerate(graph_annos): boxes_i = np.array(entry['bboxes']) gt_classes_i = np.array(entry['gt_classes']) rels = np.array(entry['gt_rels']) if len(rels) == 0: continue filename = entry['imPath'] if filter_non_overlap: inters = bbox_overlaps(boxes_i, boxes_i) rel_overs = inters[rels[:, 0], rels[:, 1]] inc = np.where(rel_overs > 0.0)[0] if inc.size > 0: rels = rels[inc] else: continue boxes.append(boxes_i) gt_classes.append(gt_classes_i) relationships.append(rels) filenames.append(filename) return boxes, gt_classes, relationships, filenames
def box_filter(boxes, must_overlap=False): """ Only include boxes that overlap as possible relations. If no overlapping boxes, use all of them.""" n_cands = boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype(np.float)) > 0 np.fill_diagonal(overlaps, 0) all_possib = np.ones_like(overlaps, dtype=np.bool) np.fill_diagonal(all_possib, 0) if must_overlap: possible_boxes = np.column_stack(np.where(overlaps)) if possible_boxes.size == 0: possible_boxes = np.column_stack(np.where(all_possib)) else: possible_boxes = np.column_stack(np.where(all_possib)) return possible_boxes
def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: """ # subject-predicate-object triplet matching # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets # Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those # rows match. keeps = intersect_2d(gt_triplets, pred_triplets) # list of boolean, length is gt_triplets.shape[0]; # True means the row there is pred_triplet matches gt_triplet gt_has_match = keeps.any(1) # len(pred_to_gt) = pred_triplets.shape[0] pred_to_gt = [[] for x in range(pred_boxes.shape[0])] # boxes matching; iterate for (#True in gt_has_match) times for gt_ind, gt_box, keep_inds in zip(np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): # keep_inds is an row of keeps; get boxes(8 dimension) where triplet matches gt boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate((gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate((box_union.min(1)[:,:2], box_union.max(1)[:,2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: # scene graph detection where each object box must independently overlap with the corresponding ground truth box sub_iou = bbox_overlaps(gt_box[None,:4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None,4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) # give the row number of gt_triplets to the pred_to_gt list # ex. No.3 gt triplet matches, then No.10 pred_boxes matches, namely (3, 10) in keeps # And No.5 gt triplet matches, then No.10 pred_boxes matches, namely (5, 10) in keeps # so finally, No.10 element of pred_to_gt should be [3,5] (because different boxes but same classes) # also, the pred_triplets should be sorted, so the values cluster in previous entries of pred_to_gt list for i in np.where(keep_inds)[0][inds]: pred_to_gt[i].append(int(gt_ind)) # gt_trp is (7, 3) # pred_triplets is (810, 3) # gthasmatch is [False False False True True False True] # pred_to_gt len is 810 # pred_boxes is (810, 8) # keep_inds is (810,) # keep_inds is (810,) # keep_inds is (810,) return pred_to_gt
def load_graphs(graphs_file, mode='train', num_im=-1, num_val_im=0, filter_empty_rels=True, filter_non_overlap=False): """ Load the file containing the GT boxes and relations, as well as the dataset split :param graphs_file: HDF5 :param mode: (train, val, or test) :param num_im: Number of images we want :param num_val_im: Number of validation images :param filter_empty_rels: (will be filtered otherwise.) :param filter_non_overlap: If training, filter images that dont overlap. :return: image_index: numpy array corresponding to the index of images we're using boxes: List where each element is a [num_gt, 4] array of ground truth boxes (x1, y1, x2, y2) gt_classes: List where each element is a [num_gt] array of classes relationships: List where each element is a [num_r, 3] array of (box_ind_1, box_ind_2, predicate) relationships """ if mode not in ('train', 'val', 'test'): raise ValueError('{} invalid'.format(mode)) roi_h5 = h5py.File(graphs_file, 'r') data_split = roi_h5['split'][:] split = 2 if mode == 'test' else 0 split_mask = data_split == split # Filter out images without bounding boxes split_mask &= roi_h5['img_to_first_box'][:] >= 0 if filter_empty_rels: split_mask &= roi_h5['img_to_first_rel'][:] >= 0 image_index = np.where(split_mask)[0] if num_im > -1: image_index = image_index[:num_im] if num_val_im > 0: if mode == 'val': image_index = image_index[:num_val_im] elif mode == 'train': image_index = image_index[num_val_im:] split_mask = np.zeros_like(data_split).astype(bool) split_mask[image_index] = True # Get box information all_labels = roi_h5['labels'][:, 0] all_boxes = roi_h5['boxes_{}'.format(BOX_SCALE)][:] # will index later assert np.all(all_boxes[:, :2] >= 0) # sanity check assert np.all(all_boxes[:, 2:] > 0) # no empty box # convert from xc, yc, w, h to x1, y1, x2, y2 all_boxes[:, :2] = all_boxes[:, :2] - all_boxes[:, 2:] / 2 all_boxes[:, 2:] = all_boxes[:, :2] + all_boxes[:, 2:] im_to_first_box = roi_h5['img_to_first_box'][split_mask] im_to_last_box = roi_h5['img_to_last_box'][split_mask] im_to_first_rel = roi_h5['img_to_first_rel'][split_mask] im_to_last_rel = roi_h5['img_to_last_rel'][split_mask] # load relation labels _relations = roi_h5['relationships'][:] _relation_predicates = roi_h5['predicates'][:, 0] assert (im_to_first_rel.shape[0] == im_to_last_rel.shape[0]) assert (_relations.shape[0] == _relation_predicates.shape[0] ) # sanity check # Get everything by image. boxes = [] gt_classes = [] relationships = [] for i in range(len(image_index)): boxes_i = all_boxes[im_to_first_box[i]:im_to_last_box[i] + 1, :] gt_classes_i = all_labels[im_to_first_box[i]:im_to_last_box[i] + 1] if im_to_first_rel[i] >= 0: predicates = _relation_predicates[ im_to_first_rel[i]:im_to_last_rel[i] + 1] obj_idx = _relations[im_to_first_rel[i]:im_to_last_rel[i] + 1] - im_to_first_box[i] assert np.all(obj_idx >= 0) assert np.all(obj_idx < boxes_i.shape[0]) rels = np.column_stack((obj_idx, predicates)) else: assert not filter_empty_rels rels = np.zeros((0, 3), dtype=np.int32) if filter_non_overlap: assert mode == 'train' inters = bbox_overlaps(boxes_i, boxes_i) rel_overs = inters[rels[:, 0], rels[:, 1]] inc = np.where(rel_overs > 0.0)[0] if inc.size > 0: rels = rels[inc] else: split_mask[image_index[i]] = 0 continue boxes.append(boxes_i) gt_classes.append(gt_classes_i) relationships.append(rels) return split_mask, boxes, gt_classes, relationships
def anchor_target_layer(gt_boxes, im_size, allowed_border=0): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. for each (H, W) location i generate 3 anchor boxes centered on cell i filter out-of-image anchors measure GT overlap :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as the image (IM_SCALE) :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE """ if max(im_size) != IM_SCALE: raise ValueError("im size is {}".format(im_size)) h, w = im_size # Get the indices of the anchors in the feature map. # h, w, A, 4 ans_np = generate_anchors( base_size=ANCHOR_SIZE, feat_stride=16, anchor_scales=ANCHOR_SCALES, anchor_ratios=ANCHOR_RATIOS, ) ans_np_flat = ans_np.reshape((-1, 4)) inds_inside = np.where((ans_np_flat[:, 0] >= -allowed_border) & (ans_np_flat[:, 1] >= -allowed_border) & (ans_np_flat[:, 2] < w + allowed_border) & # width (ans_np_flat[:, 3] < h + allowed_border) # height )[0] good_ans_flat = ans_np_flat[inds_inside] if good_ans_flat.size == 0: raise ValueError( "There were no good anchors for an image of size {} with boxes {}". format(im_size, gt_boxes)) # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes] overlaps = bbox_overlaps(good_ans_flat, gt_boxes) anchor_to_gtbox = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox] gtbox_to_anchor = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # Good anchors are those that match SOMEWHERE within a decent tolerance # label: 1 is positive, 0 is negative, -1 is dont care. # assign bg labels first so that positive labels can clobber them labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64) labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1 # subsample negative labels if we have too many num_bg = RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1 # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0])) # Get the labels at the original size labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64) labels_unmap[inds_inside] = labels # h, w, A labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1]) anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0)) # These ought to be in the same order anchor_inds_flat = np.where(labels >= 0)[0] anchors = good_ans_flat[anchor_inds_flat] bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]] labels = labels[anchor_inds_flat] assert np.all(labels >= 0) # Anchors: [num_used, 4] # Anchor_inds: [num_used, 3] (h, w, A) # bbox_targets: [num_used, 4] # labels: [num_used] return anchors, anchor_inds, bbox_targets, labels