Ejemplo n.º 1
0
def cal_configure(gt_triplets, pred_triplets):
    match_pair = intersect_2d(gt_triplets[:, :2], pred_triplets[:, :2])
    match_pair = match_pair.argmax(1)
    conf_matrix = np.zeros((50,50), dtype=np.int64)
    for i, j in enumerate(match_pair):
        conf_matrix[gt_triplets[i, 2]-1, pred_triplets[j, 2]-1] += 1
    return conf_matrix
def _compute_pred_matches(gt_triplets,
                          pred_triplets,
                          gt_boxes,
                          pred_boxes,
                          iou_thresh,
                          phrdet=False,
                          rel_cats=None):
    """
    Given a set of predicted triplets, return the list of matching GT's for each of the
    given predictions
    :param gt_triplets:
    :param pred_triplets:
    :param gt_boxes:
    :param pred_boxes:
    :param iou_thresh:
    :return:
    """
    # This performs a matrix multiplication-esque thing between the two arrays
    # Instead of summing, we want the equality, so we reduce in that way
    # The rows correspond to GT triplets, columns to pred triplets
    keeps = intersect_2d(gt_triplets, pred_triplets)
    gt_has_match = keeps.any(1)
    pred_to_gt = {}
    for rel_cat_id, rel_cat_name in rel_cats.items():
        pred_to_gt[rel_cat_name] = [[] for x in range(pred_boxes.shape[0])]
    for gt_ind, gt_box, keep_inds in zip(
            np.where(gt_has_match)[0],
            gt_boxes[gt_has_match],
            keeps[gt_has_match],
    ):
        boxes = pred_boxes[keep_inds]
        if phrdet:
            # Evaluate where the union box > 0.5
            gt_box_union = gt_box.reshape((2, 4))
            gt_box_union = np.concatenate(
                (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0)

            box_union = boxes.reshape((-1, 2, 4))
            box_union = np.concatenate(
                (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1)

            inds = bbox_overlaps(gt_box_union[None],
                                 box_union)[0] >= iou_thresh

        else:
            sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0]
            obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0]

            inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh)

        for i in np.where(keep_inds)[0][inds]:
            pred_to_gt['all_rel_cates'][i].append(int(gt_ind))
            pred_to_gt[rel_cats[gt_triplets[int(gt_ind),
                                            1]]][i].append(int(gt_ind))
    return pred_to_gt
Ejemplo n.º 3
0
def _compute_pred_matches(gt_triplets,
                          pred_triplets,
                          gt_boxes,
                          pred_boxes,
                          iou_thresh,
                          phrdet=False):
    """
    Given a set of predicted triplets, return the list of matching GT's for each of the
    given predictions
    :param gt_triplets: 
    :param pred_triplets: 
    :param gt_boxes: 
    :param pred_boxes: 
    :param iou_thresh: 
    :return:
        pred_to_gt: (NumOfPredRels, list) list of list,
            each pred_to_gt[i] means the i-th predication boxes matching gt rel list
            e.g. pred_to_gt[0] = [1, 2] means the 0-th pred rels match the 1 and 2 ground truth relation
    """
    # This performs a matrix multiplication-esque thing between the two arrays
    # Instead of summing, we want the equality, so we reduce in that way
    # The rows correspond to GT triplets, columns to pred triplets
    # keeps: (NumOfGTRels, NumOfPredRels) boolean
    keeps = intersect_2d(gt_triplets, pred_triplets)
    gt_has_match = keeps.any(1)
    pred_to_gt = [[] for x in range(pred_boxes.shape[0])]
    for gt_ind, gt_box, keep_inds in zip(
            np.where(gt_has_match)[0],
            gt_boxes[gt_has_match],
            keeps[gt_has_match],
    ):
        boxes = pred_boxes[keep_inds]
        if phrdet:
            # Evaluate where the union box > 0.5
            gt_box_union = gt_box.reshape((2, 4))
            gt_box_union = np.concatenate(
                (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0)

            box_union = boxes.reshape((-1, 2, 4))
            box_union = np.concatenate(
                (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1)

            inds = bbox_overlaps(gt_box_union[None],
                                 box_union)[0] >= iou_thresh

        else:
            sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0]
            obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0]

            inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh)

        for i in np.where(keep_inds)[0][inds]:  # for each matched pred_boxes
            pred_to_gt[i].append(int(gt_ind))
    return pred_to_gt
Ejemplo n.º 4
0
def get_counts(train_data=VG(
    mode='train', filter_duplicate_rels=False, num_val_im=5000),
               must_overlap=True):
    """
    Get counts of all of the relations. Used for modeling directly P(rel | o1, o2)
    :param train_data: 
    :param must_overlap: 
    :return: 
    """
    fg_matrix = np.zeros((
        train_data.num_classes,
        train_data.num_classes,
        train_data.num_predicates,
    ),
                         dtype=np.int64)

    bg_matrix = np.zeros((
        train_data.num_classes,
        train_data.num_classes,
    ),
                         dtype=np.int64)

    for ex_ind in range(len(train_data)):
        gt_classes = train_data.gt_classes[ex_ind].copy()
        gt_relations = train_data.relationships[ex_ind].copy()
        gt_boxes = train_data.gt_boxes[ex_ind].copy()

        # For the foreground, we'll just look at everything
        o1o2 = gt_classes[gt_relations[:, :2]]
        for (o1, o2), gtr in zip(o1o2, gt_relations[:, 2]):
            fg_matrix[o1, o2, gtr] += 1

        # For the background, get all of the things that overlap.
        o1o2_total = gt_classes[np.array(box_filter(gt_boxes,
                                                    must_overlap=must_overlap),
                                         dtype=int)]
        mask = intersect_2d(o1o2_total, o1o2).any(1)
        index = np.where(mask)[0]
        o1o2_bg = o1o2_total[index]
        for (o1, o2) in o1o2_bg:
            bg_matrix[o1, o2] += 1

    return fg_matrix, bg_matrix
Ejemplo n.º 5
0
def evaluate_from_dict(gt_entry,
                       pred_entry,
                       mode,
                       result_dict,
                       multiple_preds=False,
                       viz_dict=None,
                       **kwargs):
    """
    Shortcut to doing evaluate_recall from dict
    :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
    :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
    :param mode: 'det' or 'cls'
    :param result_dict: 
    :param viz_dict: 
    :param kwargs: 
    :return: 
    """
    gt_rels = gt_entry['gt_relations']
    gt_boxes = gt_entry['gt_boxes'].astype(float)
    gt_classes = gt_entry['gt_classes']

    pred_rel_inds = pred_entry['pred_rel_inds']
    rel_scores = pred_entry['rel_scores']

    if mode == 'predcls':
        pred_boxes = gt_boxes
        pred_classes = gt_classes
        obj_scores = np.ones(gt_classes.shape[0])
    elif mode == 'sgcls':
        pred_boxes = gt_boxes
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'sgdet' or mode == 'phrdet':
        pred_boxes = pred_entry['pred_boxes'].astype(float)
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'preddet':
        # Only extract the indices that appear in GT
        prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
        if prc.size == 0:
            for k in result_dict[mode + '_recall']:
                result_dict[mode + '_recall'][k].append(0.0)
            return None, None, None
        pred_inds_per_gt = prc.argmax(0)
        pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
        rel_scores = rel_scores[pred_inds_per_gt]

        # Now sort the matching ones
        rel_scores_sorted = argsort_desc(rel_scores[:, 1:])
        rel_scores_sorted[:, 1] += 1
        rel_scores_sorted = np.column_stack(
            (pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1]))

        matches = intersect_2d(rel_scores_sorted, gt_rels)
        for k in result_dict[mode + '_recall']:
            rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0])
            result_dict[mode + '_recall'][k].append(rec_i)
        return None, None, None
    else:
        raise ValueError('invalid mode')

    if multiple_preds:
        obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
        overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:]
        score_inds = argsort_desc(overall_scores)[:100]
        pred_rels = np.column_stack(
            (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1))
        predicate_scores = rel_scores[score_inds[:, 0], score_inds[:, 1] + 1]
    else:
        pred_rels = np.column_stack(
            (pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1)))
        predicate_scores = rel_scores[:, 1:].max(1)

    pred_to_gt, pred_5ples, rel_scores = evaluate_recall(
        gt_rels,
        gt_boxes,
        gt_classes,
        pred_rels,
        pred_boxes,
        pred_classes,
        predicate_scores,
        obj_scores,
        phrdet=mode == 'phrdet',
        **kwargs)

    for k in result_dict[mode + '_recall']:

        match = reduce(np.union1d, pred_to_gt[:k])

        rec_i = float(len(match)) / float(gt_rels.shape[0])
        result_dict[mode + '_recall'][k].append(rec_i)
    return pred_to_gt, pred_5ples, rel_scores
Ejemplo n.º 6
0
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False,
                       viz_dict=None, **kwargs):
    """
    Shortcut to doing evaluate_recall from dict
    :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
    :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
    :param mode: 'det' or 'cls'
    :param result_dict: 
    :param viz_dict: 
    :param kwargs: 
    :return: 
    """
    gt_rels = gt_entry['gt_relations']
    gt_boxes = gt_entry['gt_boxes'].astype(float)
    gt_classes = gt_entry['gt_classes']

    pred_rel_inds = pred_entry['pred_rel_inds']
    rel_scores = pred_entry['rel_scores']

    if mode == 'predcls':
        pred_boxes = gt_boxes
        pred_classes = gt_classes
        obj_scores = np.ones(gt_classes.shape[0])
    elif mode == 'sgcls':
        pred_boxes = gt_boxes
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'sgdet' or mode == 'phrdet':
        pred_boxes = pred_entry['pred_boxes'].astype(float)
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'preddet':
        # Only extract the indices that appear in GT
        prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
        if prc.size == 0:
            for k in result_dict[mode + '_recall']:
                result_dict[mode + '_recall'][k].append(0.0)
            return None, None, None
        pred_inds_per_gt = prc.argmax(0)
        pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
        rel_scores = rel_scores[pred_inds_per_gt]

        # Now sort the matching ones
        rel_scores_sorted = argsort_desc(rel_scores[:,1:])
        rel_scores_sorted[:,1] += 1
        rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:,0]], rel_scores_sorted[:,1]))

        matches = intersect_2d(rel_scores_sorted, gt_rels)
        for k in result_dict[mode + '_recall']:
            rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0])
            result_dict[mode + '_recall'][k].append(rec_i)
        return None, None, None
    else:
        raise ValueError('invalid mode')

    # multi_pred controls whether Allow multiple predicates per pair of box0, box1
    if multiple_preds:
        # compute overall_score which is used to sort @100
        obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
        overall_scores = obj_scores_per_rel[:,None] * rel_scores[:,1:]
        # sort and get the 100 highest overall_scores
        score_inds = argsort_desc(overall_scores)[:100]
        pred_rels = np.column_stack((pred_rel_inds[score_inds[:,0]], score_inds[:,1]+1))
        predicate_scores = rel_scores[score_inds[:,0], score_inds[:,1]+1]
    else:
        # only one relationship: get the predicate index and score whose score is the highest
        # pred_rels is [num_rel, 2+1]; predicate_scores is [num_rel, 1]
        pred_rels = np.column_stack((pred_rel_inds, 1+rel_scores[:,1:].argmax(1)))  # merge matrix; .argmax return the index of maximum
        predicate_scores = rel_scores[:,1:].max(1)  # maximum of each row (most likely relation between certain 2 objects)
    # print("pred_rels is", pred_rels)

    # compare gt and prediction: boxes, classes, predicate
    pred_to_gt, pred_5ples, rel_scores = evaluate_recall(
                gt_rels, gt_boxes, gt_classes,
                pred_rels, pred_boxes, pred_classes,
                predicate_scores, obj_scores, phrdet= mode=='phrdet',
                **kwargs)
    
    #######################################################
    # get k = @20 @50 @100 results from length of num_rel which is predicted
    for k in result_dict[mode + '_recall']:
        # how many are correct in the first k prdicitons; get a union set, element can be reduplicate;
        # shape: (#correct,)
        match = reduce(np.union1d, pred_to_gt[:k]) 
        # attention: match / gt
        rec_i = float(len(match)) / float(gt_rels.shape[0])

        # recall value in @20, @50, @100; stored in self.result_dict
        result_dict[mode + '_recall'][k].append(rec_i)  
    
    # pred_to_gt: list len is num_rel, length of list inside the whole lis may not be 1
    # pred_5ples: (num_rel, 5), (id0, id1, cls0, cls1, rel)
    # rel_scores: (num_rel, 3), (box1, box2, predicate score), sorted by overall score
    return pred_to_gt, pred_5ples, rel_scores
Ejemplo n.º 7
0
def _compute_pred_matches(gt_triplets, pred_triplets,
                 gt_boxes, pred_boxes, iou_thresh, phrdet=False):
    """
    Given a set of predicted triplets, return the list of matching GT's for each of the
    given predictions
    :param gt_triplets: 
    :param pred_triplets: 
    :param gt_boxes: 
    :param pred_boxes: 
    :param iou_thresh: 
    :return: 
    """
    # subject-predicate-object triplet matching
    
    # This performs a matrix multiplication-esque thing between the two arrays
    # Instead of summing, we want the equality, so we reduce in that way

    # The rows correspond to GT triplets, columns to pred triplets
    # Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those
    # rows match.
    keeps = intersect_2d(gt_triplets, pred_triplets)
    # list of boolean, length is gt_triplets.shape[0]; 
    # True means the row there is pred_triplet matches gt_triplet
    gt_has_match = keeps.any(1) 
    # len(pred_to_gt) = pred_triplets.shape[0]
    pred_to_gt = [[] for x in range(pred_boxes.shape[0])]
    
    # boxes matching; iterate for (#True in gt_has_match) times
    for gt_ind, gt_box, keep_inds in zip(np.where(gt_has_match)[0],
                                         gt_boxes[gt_has_match],
                                         keeps[gt_has_match],
                                         ):
        
        # keep_inds is an row of keeps; get boxes(8 dimension) where triplet matches gt
        boxes = pred_boxes[keep_inds]
        if phrdet:
            # Evaluate where the union box > 0.5
            gt_box_union = gt_box.reshape((2, 4))
            gt_box_union = np.concatenate((gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0)

            box_union = boxes.reshape((-1, 2, 4))
            box_union = np.concatenate((box_union.min(1)[:,:2], box_union.max(1)[:,2:]), 1)

            inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh

        else:
            # scene graph detection where each object box must independently overlap with the corresponding ground truth box
            sub_iou = bbox_overlaps(gt_box[None,:4], boxes[:, :4])[0]
            obj_iou = bbox_overlaps(gt_box[None,4:], boxes[:, 4:])[0]

            inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh)

        # give the row number of gt_triplets to the pred_to_gt list
        # ex. No.3 gt triplet matches, then No.10 pred_boxes matches, namely (3, 10) in keeps
        # And No.5 gt triplet matches, then No.10 pred_boxes matches, namely (5, 10) in keeps
        # so finally, No.10 element of pred_to_gt should be [3,5] (because different boxes but same classes)
        # also, the pred_triplets should be sorted, so the values cluster in previous entries of pred_to_gt list
        for i in np.where(keep_inds)[0][inds]:
            pred_to_gt[i].append(int(gt_ind))

    # gt_trp is  (7, 3)
    # pred_triplets is (810, 3)
    # gthasmatch is [False False False  True  True False  True]
    # pred_to_gt len is 810
    # pred_boxes is  (810, 8)
    # keep_inds is  (810,)
    # keep_inds is  (810,)
    # keep_inds is  (810,)

    return pred_to_gt
Ejemplo n.º 8
0
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_classes=None,
                gt_rels=None,
                proposals=None,
                train_anchor_inds=None,
                return_fmap=False):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
            scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels

            if test:
            prob dists, boxes, img inds, maxscores, classes

        """
        result = self.detector(x,
                               im_sizes,
                               image_offset,
                               gt_boxes,
                               gt_classes,
                               gt_rels,
                               proposals,
                               train_anchor_inds,
                               return_fmap=True)
        # rel_feat = self.relationship_feat.feature_map(x)

        if result.is_none():
            return ValueError("heck")

        im_inds = result.im_inds - image_offset
        boxes = result.rm_box_priors

        if self.training and result.rel_labels is None:
            assert self.mode == 'sgdet'
            result.rel_labels = rel_assignments(im_inds.data,
                                                boxes.data,
                                                result.rm_obj_labels.data,
                                                gt_boxes.data,
                                                gt_classes.data,
                                                gt_rels.data,
                                                image_offset,
                                                filter_non_overlap=True,
                                                num_sample_per_gt=1)

        rel_inds = self.get_rel_inds(result.rel_labels, im_inds, boxes)
        spt_feats = self.get_boxes_encode(boxes, rel_inds)
        pair_inds = self.union_pairs(im_inds)

        if self.hook_for_grad:
            rel_inds = gt_rels[:, :-1].data

        if self.hook_for_grad:
            fmap = result.fmap
            fmap.register_hook(self.save_grad)
        else:
            fmap = result.fmap.detach()

        rois = torch.cat((im_inds[:, None].float(), boxes), 1)

        result.obj_fmap = self.obj_feature_map(fmap, rois)
        # result.obj_dists_head = self.obj_classify_head(obj_fmap_rel)

        obj_embed = F.softmax(result.rm_obj_dists,
                              dim=1) @ self.obj_embed.weight
        obj_embed_lstm = F.softmax(result.rm_obj_dists,
                                   dim=1) @ self.embeddings4lstm.weight
        pos_embed = self.pos_embed(Variable(center_size(boxes.data)))
        obj_pre_rep = torch.cat((result.obj_fmap, obj_embed, pos_embed), 1)
        obj_feats = self.merge_obj_feats(obj_pre_rep)
        # obj_feats=self.trans(obj_feats)
        obj_feats_lstm = torch.cat(
            (obj_feats, obj_embed_lstm),
            -1).contiguous().view(1, obj_feats.size(0), -1)

        # obj_feats = F.relu(obj_feats)

        phr_ori = self.visual_rep(fmap, rois, pair_inds[:, 1:])
        vr_indices = torch.from_numpy(
            intersect_2d(rel_inds[:, 1:].cpu().numpy(),
                         pair_inds[:, 1:].cpu().numpy()).astype(
                             np.uint8)).cuda().max(-1)[1]
        vr = phr_ori[vr_indices]

        phr_feats_high = self.get_phr_feats(phr_ori)

        obj_feats_lstm_output, (obj_hidden_states,
                                obj_cell_states) = self.lstm(obj_feats_lstm)

        rm_obj_dists1 = result.rm_obj_dists + self.context.decoder_lin(
            obj_feats_lstm_output.squeeze())
        obj_feats_output = self.obj_mps1(obj_feats_lstm_output.view(-1, obj_feats_lstm_output.size(-1)), \
                            phr_feats_high, im_inds, pair_inds)

        obj_embed_lstm1 = F.softmax(rm_obj_dists1,
                                    dim=1) @ self.embeddings4lstm.weight

        obj_feats_lstm1 = torch.cat((obj_feats_output, obj_embed_lstm1), -1).contiguous().view(1, \
                            obj_feats_output.size(0), -1)
        obj_feats_lstm_output, _ = self.lstm(
            obj_feats_lstm1, (obj_hidden_states, obj_cell_states))

        rm_obj_dists2 = rm_obj_dists1 + self.context.decoder_lin(
            obj_feats_lstm_output.squeeze())
        obj_feats_output = self.obj_mps1(obj_feats_lstm_output.view(-1, obj_feats_lstm_output.size(-1)), \
                            phr_feats_high, im_inds, pair_inds)

        # Prevent gradients from flowing back into score_fc from elsewhere
        result.rm_obj_dists, result.obj_preds = self.context(
            rm_obj_dists2, obj_feats_output, result.rm_obj_labels
            if self.training or self.mode == 'predcls' else None, boxes.data,
            result.boxes_all)

        obj_dtype = result.obj_fmap.data.type()
        obj_preds_embeds = torch.index_select(self.ort_embedding, 0,
                                              result.obj_preds).type(obj_dtype)
        tranfered_boxes = torch.stack(
            (boxes[:, 0] / IM_SCALE, boxes[:, 3] / IM_SCALE,
             boxes[:, 2] / IM_SCALE, boxes[:, 1] / IM_SCALE,
             ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])) /
             (IM_SCALE**2)), -1).type(obj_dtype)
        obj_features = torch.cat(
            (result.obj_fmap, obj_preds_embeds, tranfered_boxes), -1)
        obj_features_merge = self.merge_obj_low(
            obj_features) + self.merge_obj_high(obj_feats_output)

        # Split into subject and object representations
        result.subj_rep = self.post_emb_s(obj_features_merge)[rel_inds[:, 1]]
        result.obj_rep = self.post_emb_o(obj_features_merge)[rel_inds[:, 2]]
        prod_rep = result.subj_rep * result.obj_rep

        # obj_pools = self.visual_obj(result.fmap.detach(), rois, rel_inds[:, 1:])
        # rel_pools = self.relationship_feat.union_rel_pooling(rel_feat, rois, rel_inds[:, 1:])
        # context_pools = torch.cat([obj_pools, rel_pools], 1)
        # merge_pool = self.merge_feat(context_pools)
        # vr = self.roi_fmap(merge_pool)

        # vr = self.rel_refine(vr)

        prod_rep = prod_rep * vr

        if self.use_tanh:
            prod_rep = F.tanh(prod_rep)

        prod_rep = torch.cat((prod_rep, spt_feats), -1)
        freq_gate = self.freq_gate(prod_rep)
        freq_gate = F.sigmoid(freq_gate)
        result.rel_dists = self.rel_compress(prod_rep)
        # result.rank_factor = self.ranking_module(prod_rep).view(-1)

        if self.use_bias:
            result.rel_dists = result.rel_dists + freq_gate * self.freq_bias.index_with_labels(
                torch.stack((
                    result.obj_preds[rel_inds[:, 1]],
                    result.obj_preds[rel_inds[:, 2]],
                ), 1))

        if self.training:
            return result

        twod_inds = arange(
            result.obj_preds.data) * self.num_classes + result.obj_preds.data
        result.obj_scores = F.softmax(result.rm_obj_dists,
                                      dim=1).view(-1)[twod_inds]

        # Bbox regression
        if self.mode == 'sgdet':
            bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(
                result.boxes_all.size(0), 4)
        else:
            # Boxes will get fixed by filter_dets function.
            bboxes = result.rm_box_priors

        rel_rep = F.softmax(result.rel_dists, dim=1)
        # rel_rep = smooth_one_hot(rel_rep)
        # rank_factor = F.sigmoid(result.rank_factor)

        return filter_dets(bboxes, result.obj_scores, result.obj_preds,
                           rel_inds[:, 1:], rel_rep)
Ejemplo n.º 9
0
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False,
                       viz_dict=None, **kwargs):
    """
    Shortcut to doing evaluate_recall from dict
    :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
    :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
    :param mode: 'det' or 'cls'
    :param result_dict: 
    :param viz_dict: 
    :param kwargs: 
    :return: 
    """
    gt_rels = gt_entry['gt_relations']
    gt_boxes = gt_entry['gt_boxes'].astype(float)
    gt_classes = gt_entry['gt_classes']

    pred_rel_inds = pred_entry['pred_rel_inds']
    rel_scores = pred_entry['rel_scores']
    pred_predicates = np.zeros((rel_scores.shape[-1]-1,))
    gt_predicates = np.zeros((rel_scores.shape[-1]-1,))

    vaild_mask = np.bincount(gt_rels[:, :-1].reshape(-1), minlength=len(gt_classes)) > 0

    for i in gt_rels[:, 2]:
        gt_predicates[i - 1] += 1.0

    if mode == 'predcls':
        pred_boxes = gt_boxes
        pred_classes = gt_classes
        obj_scores = np.ones(gt_classes.shape[0])
        pred_rel_inds, rel_scores = filter_out_ind(pred_rel_inds, rel_scores, gt_classes.shape[0])
    elif mode == 'sgcls':
        pred_boxes = gt_boxes
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'sgdet' or mode == 'phrdet':
        pred_boxes = pred_entry['pred_boxes'].astype(float)
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'preddet':
        # Only extract the indices that appear in GT
        prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
        if prc.size == 0:
            for k in result_dict[mode + '_recall']:
                result_dict[mode + '_recall'][k].append(0.0)
            return None, None, None
        pred_inds_per_gt = np.where(prc)[0]
        pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
        rel_scores = rel_scores[pred_inds_per_gt]

        # Now sort the matching ones
        rel_scores_sorted = argsort_desc(rel_scores[:,1:])
        rel_scores = np.sort(np.ravel(rel_scores[:,1:]))[::-1]
        rel_scores_sorted[:,1] += 1
        rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:,0]], rel_scores_sorted[:,1]))

        matches = intersect_2d(rel_scores_sorted, gt_rels)
        for k in result_dict[mode + '_recall']:
            rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0])
            result_dict[mode + '_recall'][k].append(rec_i)
        return rel_scores_sorted, None, rel_scores
    else:
        raise ValueError('invalid mode')

    if multiple_preds:
        obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
        overall_scores = obj_scores_per_rel[:,None] * rel_scores[:,1:]
        score_inds = argsort_desc(overall_scores)[:100]
        pred_rels = np.column_stack((pred_rel_inds[score_inds[:,0]], score_inds[:,1]+1))
        predicate_scores = rel_scores[score_inds[:,0], score_inds[:,1]+1]
    else:
        pred_rels = np.column_stack((pred_rel_inds, 1+rel_scores[:,1:].argmax(1)))
        predicate_scores = rel_scores[:,1:].max(1)

    pred_to_gt, pred_5ples, rel_scores = evaluate_recall(
                gt_rels, gt_boxes, gt_classes,
                pred_rels, pred_boxes, pred_classes,
                predicate_scores, obj_scores, phrdet= mode=='phrdet',
                **kwargs)
    match_indices = reduce(np.union1d, pred_to_gt[:100]).astype(np.int64)
    for i in gt_rels[match_indices, 2]:
        pred_predicates[i - 1] += 1.0
    objs_mAP = (gt_classes[vaild_mask]==pred_classes[vaild_mask]).mean()
    result_dict[mode + '_objs_mAP'].append(objs_mAP)
    result_dict[mode + '_pred_predicates'].append(pred_predicates)
    result_dict[mode + '_gt_predicates'].append(gt_predicates)
    result_dict[mode + '_objs_mAP'].append(objs_mAP)

    for k in result_dict[mode + '_recall']:

        match = reduce(np.union1d, pred_to_gt[:k])

        rec_i = float(len(match)) / float(gt_rels.shape[0])
        result_dict[mode + '_recall'][k].append(rec_i)
        prc = intersect_2d(gt_rels[:, :-1], pred_rel_inds[:k]).any(1).sum()
        p_i = float(prc) / float(gt_rels.shape[0])
        result_dict[mode + '_pairrecall'][k].append(p_i)

    return pred_to_gt, pred_5ples, predicate_scores, pred_rels
Ejemplo n.º 10
0
def rel_proposal_target(rois, rel_proposal_inds, gt_boxes, gt_classes, gt_rels, image_offset, mode):
    """
    Assign the tareget for each proposal pairs.
    When the mode is predcls or sgcls, the target is directly obtained by comparing with gt_rel.
    When the mode is sgdet, the target is sampled by firstly compute iou with gt_pairs
    :param rois:
    :param rel_proposal_inds: [im_ind, ind1, ind2]
    :param gt_boxes:
    :param image_offset:
    :param mode:
    :return:
    """
    im_inds = rois[:, 0].long()

    num_im = im_inds[-1] + 1

    # Offset the image indices in fg_rels to refer to absolute indices (not just within img i)
    fg_rels = gt_rels.clone()
    fg_rels[:, 0] -= image_offset
    offset = {}
    for i, s, e in enumerate_by_image(gt_classes[:, 0]):
        offset[i] = s
    for i, s, e in enumerate_by_image(fg_rels[:, 0]):
        fg_rels[s:e, 1:3] += offset[i]

    rels_to_gt = []
    num_gt_rels_seen = 0

    if mode in ('predcls', 'sgcls'):
        rel_proposal_inds_np = rel_proposal_inds.cpu().numpy()
        fg_rels_np = fg_rels.cpu().numpy()  ## Ngtp, 4

        # locate the proposal
        locate_inds = np.where(intersect_2d(rel_proposal_inds_np, fg_rels_np[:, :-1]))
        proposal_to_gt = defaultdict(list)
        for ind in zip(*locate_inds):
            proposal_to_gt[ind[0]].append(ind[1])
        for k, v in proposal_to_gt.items():
            v0 = v[0] if len(v) == 1 else np.random.choice(v)
            proposal_to_gt[k] = v0



        fg_proposal_inds = np.array(list(proposal_to_gt.keys())).astype(np.int32)
        bg_proposal_inds = np.array(list(set(list(range(rel_proposal_inds_np.shape[0]))) - set(list(proposal_to_gt.keys())))).astype(np.int32)

        rels_to_gt = np.ones(fg_proposal_inds.shape[0] + bg_proposal_inds.shape[0], dtype=np.int64) * -1
        if len(fg_proposal_inds) > 0:
            rels_to_gt[fg_proposal_inds] = np.array([proposal_to_gt[ind] for ind in fg_proposal_inds])

        num_fg = min(fg_proposal_inds.size, int(RELS_BATCHSIZE * REL_FG_FRACTION * num_im))
        if num_fg < fg_proposal_inds.size:
            fg_proposal_inds = np.random.choice(fg_proposal_inds, num_fg, replace=False)
        num_bg = min(bg_proposal_inds.size if bg_proposal_inds.size else 0, int(RELS_BATCHSIZE * num_im) - num_fg)
        if num_bg < bg_proposal_inds.size:
            bg_proposal_inds = np.random.choice(bg_proposal_inds, num_bg, replace=False)

        if len(fg_proposal_inds) == 0:
            bg_labels = np.zeros(bg_proposal_inds.size)
            bg_rel_labels = np.hstack((rel_proposal_inds_np[bg_proposal_inds], bg_labels[:, None]))
            proposal_labels = bg_rel_labels
        else:
            fg_labels = np.array([fg_rels[proposal_to_gt[ind], -1] for ind in fg_proposal_inds])
            fg_rel_labels = np.hstack((rel_proposal_inds_np[fg_proposal_inds], fg_labels[:, None]))

            bg_labels = np.zeros(bg_proposal_inds.size)
            bg_rel_labels = np.hstack((rel_proposal_inds_np[bg_proposal_inds], bg_labels[:, None]))
            proposal_labels = np.vstack((fg_rel_labels, bg_rel_labels))

            rels_to_gt = np.hstack((rels_to_gt[fg_proposal_inds], rels_to_gt[bg_proposal_inds]))

        proposal_labels = torch.LongTensor(proposal_labels).cuda(gt_rels.get_device())
        rels_to_gt = torch.LongTensor(rels_to_gt).cuda(gt_rels.get_device())
    else:
        assert mode == 'sgdet'

        gt_box_pairs = torch.cat((gt_boxes[fg_rels[:, 1]], gt_boxes[fg_rels[:, 2]]), 1)
        rel_proposal_pairs = torch.cat((rois[:, 1:][rel_proposal_inds[:, 0]], rois[:, 1:][rel_proposal_inds[:, 1]]), 1)

        num_pairs = np.zeros(num_im + 1).astype(np.int32)
        for i, s, e in enumerate_by_image(rel_proposal_inds[:, 0]):
            num_pairs[i + 1] = e - s

        cumsum_num_pairs = np.cumsum(num_pairs).astype(np.int32)
        fg_rel_per_image = int(RELS_BATCHSIZE * REL_FG_FRACTION)

        proposal_labels = []
        gt_rel_labels = fg_rels[:, -1].contiguous().view(-1)
        for i in range(1, num_im + 1):
            rel_proposal_inds_i = rel_proposal_inds[cumsum_num_pairs[i - 1]:cumsum_num_pairs[i]]
            rel_proposal_pairs_i = rel_proposal_pairs[cumsum_num_pairs[i - 1]:cumsum_num_pairs[i]]
            gt_box_pairs_i = gt_box_pairs[torch.nonzero(fg_rels[:, 0] == (i - 1)).view(-1)]

            gt_box_pairs_label_i = gt_rel_labels[torch.nonzero(fg_rels[:, 0] == (i - 1)).view(-1)].view(-1).contiguous()

            overlaps = co_bbox_overlaps(rel_proposal_pairs_i, gt_box_pairs_i)  # Np, Ngtp
            max_overlaps, gt_assignment = torch.max(overlaps, 1)  # Np
            fg_inds = torch.nonzero(max_overlaps >= 0.5).view(-1)
            fg_num = fg_inds.numel()

            bg_inds = torch.nonzero((max_overlaps < 0.5) & (max_overlaps >= 0.0)).view(-1)
            bg_num = bg_inds.numel()

            rels_to_gt_i = torch.LongTensor(rel_proposal_pairs_i.shape[0]).fill(-1).cuda(gt_rels.get_device())
            rels_to_gt_i[fg_inds] = gt_assignment[fg_inds] + num_gt_rels_seen

            if fg_num > 0 and bg_num > 0:
                fg_this_image = min(fg_rel_per_image, fg_num)
                rand_num = torch.from_numpy(np.random.permutation(fg_num)).long().cuda()
                fg_inds = fg_inds[rand_num[:fg_this_image]]

                # sampling bg
                bg_this_image = RELS_BATCHSIZE - fg_this_image
                rand_num = np.floor(np.random.rand(bg_this_image) * bg_num)
                rand_num = torch.from_numpy(rand_num).long().cuda()
                bg_inds = bg_inds[rand_num]

                rels_to_gt_i = torch.cat((rels_to_gt_i[fg_inds], rels_to_gt_i[bg_inds]), 0)

            elif fg_num > 0 and bg_num == 0:
                rand_num = np.floor(np.random.rand(RELS_BATCHSIZE) * fg_num)
                rand_num = torch.from_numpy(rand_num).long().cuda()
                fg_inds = fg_inds[rand_num]
                fg_this_image = RELS_BATCHSIZE
                bg_this_image = 0
                rels_to_gt_i = rels_to_gt_i[fg_inds]
            elif bg_num > 0 and fg_num == 0:
                # sampling bg
                # rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(np.random.rand(RELS_BATCHSIZE) * bg_num)
                rand_num = torch.from_numpy(rand_num).long().cuda()

                bg_inds = bg_inds[rand_num]
                bg_this_image = RELS_BATCHSIZE
                fg_this_image = 0
                rels_to_gt_i = rels_to_gt_i[bg_inds]
            else:
                import pdb
                pdb.set_trace()

            keep_inds = torch.cat([fg_inds, bg_inds], 0)
            rel_proposal_inds_i = rel_proposal_inds_i[keep_inds]
            labels_i = gt_box_pairs_label_i[gt_assignment[keep_inds]]
            if fg_this_image < labels_i.size(0):
                labels_i[fg_this_image:] = 0
            rels_to_gt.append(rels_to_gt_i)
            num_gt_rels_seen += gt_box_pairs_i.shape[0]
            #try:
            #    labels_i[fg_this_image:] = 0
            #except ValueError:
            #    print(labels_i)
            #    print(fg_this_image)
            #    import pdb
            #    pdb.set_trace()
            proposal_labels.append(torch.cat((rel_proposal_inds_i, labels_i[:, None]), 1))
        proposal_labels = torch.cat(proposal_labels, 0)
        rels_to_gt = torch.cat(rels_to_gt, 0)

    # sort
    _, perm = torch.sort(
        proposal_labels[:, 0] * (rois.size(0) ** 2) + proposal_labels[:, 1] * rois.size(0) + proposal_labels[:, 2])
    proposal_labels = proposal_labels[perm].contiguous()
    rels_to_gt = rels_to_gt[perm].contiguous()

    return proposal_labels, rels_to_gt
Ejemplo n.º 11
0
def evaluate_from_dict(gt_entry, pred_entry, mode, eval_result_dict, eval_result_dict2,
                       gtrel_dist_dict, gtkeyrel_dist_dict,
                       predrel_dist_dict, predmatchedrel_dist_dict, predmatchedkeyrel_dist_dict,
                       multiple_preds=False, predrel_treedeep_scores_dict=None,
                       viz_dict=None, num_predicates=50, **kwargs):
    """
    Shortcut to doing evaluate_recall from dict
    :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
    :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
    :param mode: 'det' or 'cls'
    :param result_dict:
    :param viz_dict:
    :param kwargs:
    :return:
    """
    gt_rels = gt_entry['gt_relations']
    gt_boxes = gt_entry['gt_boxes'].astype(float)
    gt_classes = gt_entry['gt_classes']
    gt_key_rels = gt_entry['gt_key_rels'] if 'gt_key_rels' in gt_entry else None

    pred_rel_inds = pred_entry['pred_rel_inds']
    rel_scores = pred_entry['rel_scores']
    rel_rank_scores = pred_entry['rel_rank_scores']
    tree = pred_entry['forest']

    # get each node's depth
    tree_depth_dict = None
    tree_depth = None
    tree_width = None
    if tree is not None:
        root = tree[0]
        tree_depth_dict = {}
        get_treeNodes_depth(tree_depth_dict, root)
        tree_depth = root.max_depth()
        tree_width = root.max_width()

    # if rel_rank_scores is not None:
    #    rel_scores *= rel_rank_scores[:, None]

    if mode == 'predcls':
        pred_boxes = gt_boxes
        pred_classes = gt_classes
        obj_scores = np.ones(gt_classes.shape[0])
    elif mode == 'sgcls':
        pred_boxes = gt_boxes
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'sgdet' or mode == 'phrdet':
        pred_boxes = pred_entry['pred_boxes'].astype(float)
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'preddet':
        # Only extract the indices that appear in GT
        prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
        if prc.size == 0:
            for k in eval_result_dict[mode + '_recall']:
                eval_result_dict[mode + '_recall'][k].append(0.0)
            return None, None, None
        pred_inds_per_gt = prc.argmax(0)
        pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
        rel_scores = rel_scores[pred_inds_per_gt]

        # Now sort the matching ones
        rel_scores_sorted = argsort_desc(rel_scores[:, 1:])
        rel_scores_sorted[:, 1] += 1
        rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1]))

        matches = intersect_2d(rel_scores_sorted, gt_rels)
        for k in eval_result_dict[mode + '_recall']:
            rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0])
            eval_result_dict[mode + '_recall'][k].append(rec_i)
        return None, None, None
    else:
        raise ValueError('invalid mode')

    if multiple_preds:
        obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
        overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:]
        score_inds = argsort_desc(overall_scores)[:100]
        pred_rels = np.column_stack((pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1))
        predicate_scores = rel_scores[score_inds[:, 0], score_inds[:, 1] + 1]
    else:
        pred_rels = np.column_stack((pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1)))
        predicate_scores = rel_scores[:, 1:].max(1)
        # note: for relrank
        if rel_rank_scores is not None:
            predicate_scores *= rel_rank_scores

    RES_pred_to_gt, RES_subobj_to_gt, pred_5ples, rel_scores = evaluate_recall(
        gt_rels, gt_boxes, gt_classes,
        pred_rels, pred_boxes, pred_classes,
        predicate_scores, obj_scores, phrdet=mode == 'phrdet',
        **kwargs)

    # evaluate the tree: get the distribution over tree hierarchy
    if tree is not None:
        # get gt distribution, only do it in predcls or sgcls mode, use the gt box
        if mode == 'sgcls' or mode == 'predcls':
            for pair in gt_rels[:, :2]:
                gtrel_dist_dict[_gen_key(tree_depth_dict[pair[0]], tree_depth_dict[pair[1]])] += 1
            if gt_key_rels is not None:
                for ind in gt_key_rels:
                    pair = gt_rels[ind, :2]
                    gtkeyrel_dist_dict[_gen_key(tree_depth_dict[pair[0]], tree_depth_dict[pair[1]])] += 1

        # get the predicted rels distribution
        box_pair_inds = pred_rel_inds[:5]
        for ind in box_pair_inds:
            predrel_dist_dict[mode][_gen_key(tree_depth_dict[ind[0]], tree_depth_dict[ind[1]])] += 1

        norm_triplet_rel_scores = rel_scores.prod(1)
        norm_triplet_rel_scores = norm_triplet_rel_scores / np.max(norm_triplet_rel_scores)
        for ind_idx, ind in enumerate(pred_rel_inds):
            predrel_treedeep_scores_dict[mode][_gen_key(tree_depth_dict[ind[0]], tree_depth_dict[ind[1]])].append(norm_triplet_rel_scores[ind_idx])

        # get the match ones, only do it in predcls or sgcls mode, use the gt box
        match = reduce(np.union1d, RES_subobj_to_gt[:5])
        for m in match:
            predmatchedrel_dist_dict[mode][
                _gen_key(tree_depth_dict[gt_rels[int(m), 0]], tree_depth_dict[gt_rels[int(m), 1]])] += 1
        if gt_key_rels is not None:
            key_match = np.intersect1d(match, gt_key_rels)
            for m in key_match:
                predmatchedkeyrel_dist_dict[mode][
                    _gen_key(tree_depth_dict[gt_rels[int(m), 0]], tree_depth_dict[gt_rels[int(m), 1]])] += 1

    for pred_to_gt, result_dict in [(RES_pred_to_gt, eval_result_dict), (RES_subobj_to_gt, eval_result_dict2)]:

        for k in result_dict[mode + '_recall']:

            match = reduce(np.union1d, pred_to_gt[:k])

            key_match = np.intersect1d(match, gt_key_rels) if gt_key_rels is not None else None

            if gt_key_rels is not None:
                for idx in range(len(key_match)):
                    local_label = gt_rels[int(key_match[idx]), 2]
                    if (mode + '_key_recall_hit') not in result_dict:
                        result_dict[mode + '_key_recall_hit'] = {}
                    if k not in result_dict[mode + '_key_recall_hit']:
                        result_dict[mode + '_key_recall_hit'][k] = [0] * (num_predicates + 1)
                    result_dict[mode + '_key_recall_hit'][k][int(local_label)] += 1
                    result_dict[mode + '_key_recall_hit'][k][0] += 1

                for idx in range(gt_key_rels.shape[0]):
                    local_label = gt_rels[int(gt_key_rels[idx]), 2]
                    if (mode + '_key_recall_count') not in result_dict:
                        result_dict[mode + '_key_recall_count'] = {}
                    if k not in result_dict[mode + '_key_recall_count']:
                        result_dict[mode + '_key_recall_count'][k] = [0] * (num_predicates + 1)
                    result_dict[mode + '_key_recall_count'][k][int(local_label)] += 1
                    result_dict[mode + '_key_recall_count'][k][0] += 1

            for idx in range(len(match)):
                local_label = gt_rels[int(match[idx]), 2]
                if (mode + '_recall_hit') not in result_dict:
                    result_dict[mode + '_recall_hit'] = {}
                if k not in result_dict[mode + '_recall_hit']:
                    result_dict[mode + '_recall_hit'][k] = [0] * (num_predicates + 1)
                result_dict[mode + '_recall_hit'][k][int(local_label)] += 1
                result_dict[mode + '_recall_hit'][k][0] += 1

            for idx in range(gt_rels.shape[0]):
                local_label = gt_rels[idx, 2]
                if (mode + '_recall_count') not in result_dict:
                    result_dict[mode + '_recall_count'] = {}
                if k not in result_dict[mode + '_recall_count']:
                    result_dict[mode + '_recall_count'][k] = [0] * (num_predicates + 1)
                result_dict[mode + '_recall_count'][k][int(local_label)] += 1
                result_dict[mode + '_recall_count'][k][0] += 1

            rec_i = float(len(match)) / float(gt_rels.shape[0])
            result_dict[mode + '_recall'][k].append(rec_i)

            if gt_key_rels is not None:
                key_rec_i = float(len(key_match)) / float(gt_key_rels.shape[0])
                result_dict[mode + '_key_recall'][k].append(key_rec_i)
    return RES_pred_to_gt, RES_subobj_to_gt, pred_5ples, rel_scores, tree_depth, tree_width
Ejemplo n.º 12
0
def evaluate_from_dict(gt_entry,
                       pred_entry,
                       mode,
                       eval_result_dict,
                       multiple_preds=1,
                       num_predicates=50,
                       **kwargs):
    """
    Shortcut to doing evaluate_recall from dict
    :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
    :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
    :param mode: 'det' or 'cls'
    :param result_dict:
    :param viz_dict:
    :param kwargs:
    :return:
    """
    gt_rels = gt_entry['gt_relations']
    gt_boxes = gt_entry['gt_boxes'].astype(float)
    gt_classes = gt_entry['gt_classes']

    pred_rel_inds = pred_entry['pred_rel_inds']
    rel_scores = pred_entry['rel_scores']

    if mode == 'predcls':
        pred_boxes = gt_boxes
        pred_classes = gt_classes
        obj_scores = np.ones(gt_classes.shape[0])
    elif mode == 'sgcls':
        pred_boxes = gt_boxes
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'sgdet' or mode == 'phrdet':
        pred_boxes = pred_entry['pred_boxes'].astype(float)
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'preddet':
        # Only extract the indices that appear in GT
        prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
        if prc.size == 0:
            for k in eval_result_dict[mode + '_recall']:
                eval_result_dict[mode + '_recall'][k].append(0.0)
            return None, None, None
        pred_inds_per_gt = prc.argmax(0)
        pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
        rel_scores = rel_scores[pred_inds_per_gt]

        # Now sort the matching ones
        rel_scores_sorted = argsort_desc(rel_scores[:, 1:])
        rel_scores_sorted[:, 1] += 1
        rel_scores_sorted = np.column_stack(
            (pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1]))

        matches = intersect_2d(rel_scores_sorted, gt_rels)
        for k in eval_result_dict[mode + '_recall']:
            rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0])
            eval_result_dict[mode + '_recall'][k].append(rec_i)
        return None, None, None
    else:
        raise ValueError('invalid mode')

    if multiple_preds > 1:
        if multiple_preds == rel_scores.shape[1] - 1:  # all predicates
            obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
            overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:]
            score_inds = argsort_desc(overall_scores)[:100]
            pred_rels = np.column_stack(
                (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1))
            predicate_scores = rel_scores[score_inds[:, 0],
                                          score_inds[:, 1] + 1]
        else:
            # between 1 and all predictes
            obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)  # Nr
            overall_scores = obj_scores_per_rel[:,
                                                None] * rel_scores[:,
                                                                   1:]  # Nr, 70
            # sort predicate scores for each pair
            sorted_predicates_idx = np.argsort(
                -overall_scores,
                axis=1)[:, :multiple_preds]  # Nr, multiple_preds
            sorted_predicates_scores = np.sort(
                overall_scores, axis=1)[:, ::-1][:, :multiple_preds]
            score_inds = argsort_desc(sorted_predicates_scores)[:100]
            pred_rels = np.column_stack(
                (pred_rel_inds[score_inds[:, 0]],
                 sorted_predicates_idx[score_inds[:, 0], score_inds[:, 1]] +
                 1))
            predicate_scores = rel_scores[
                score_inds[:, 0],
                sorted_predicates_idx[score_inds[:, 0], score_inds[:, 1]] + 1]
    else:
        pred_rels = np.column_stack(
            (pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1)))
        predicate_scores = rel_scores[:, 1:].max(1)

    RES_pred_to_gt, pred_5ples, rel_scores = evaluate_recall(
        gt_rels,
        gt_boxes,
        gt_classes,
        pred_rels,
        pred_boxes,
        pred_classes,
        predicate_scores,
        obj_scores,
        phrdet=mode == 'phrdet',
        **kwargs)

    pred_to_gt = RES_pred_to_gt
    result_dict = eval_result_dict

    for k in result_dict[mode + '_recall']:

        match = reduce(np.union1d, pred_to_gt[:k])

        for idx in range(len(match)):
            local_label = gt_rels[int(match[idx]), 2]
            if (mode + '_recall_hit') not in result_dict:
                result_dict[mode + '_recall_hit'] = {}
            if k not in result_dict[mode + '_recall_hit']:
                result_dict[mode +
                            '_recall_hit'][k] = [0] * (num_predicates + 1)
            result_dict[mode + '_recall_hit'][k][int(local_label)] += 1
            result_dict[mode + '_recall_hit'][k][0] += 1

        for idx in range(gt_rels.shape[0]):
            local_label = gt_rels[idx, 2]
            if (mode + '_recall_count') not in result_dict:
                result_dict[mode + '_recall_count'] = {}
            if k not in result_dict[mode + '_recall_count']:
                result_dict[mode +
                            '_recall_count'][k] = [0] * (num_predicates + 1)
            result_dict[mode + '_recall_count'][k][int(local_label)] += 1
            result_dict[mode + '_recall_count'][k][0] += 1

        rec_i = float(len(match)) / float(gt_rels.shape[0])
        result_dict[mode + '_recall'][k].append(rec_i)

    return RES_pred_to_gt, pred_5ples, rel_scores
Ejemplo n.º 13
0
    def evaluate_from_dict(self,
                           gt_entry,
                           pred_entry,
                           mode,
                           result_dict,
                           multiple_preds=False,
                           viz_dict=None,
                           **kwargs):
        """
        Shortcut to doing evaluate_recall from dict
        :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
        :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
        :param mode: 'det' or 'cls'
        :param result_dict:
        :param viz_dict:
        :param kwargs:
        :return:
        """
        gt_rels = gt_entry['gt_relations']
        gt_boxes = gt_entry['gt_boxes'].astype(float)
        gt_classes = gt_entry['gt_classes']

        pred_rel_inds = pred_entry['pred_rel_inds']
        rel_scores = pred_entry['rel_scores']

        if mode == 'predcls':
            pred_boxes = gt_boxes
            pred_classes = gt_classes
            obj_scores = np.ones(gt_classes.shape[0])
        elif mode == 'sgcls':
            pred_boxes = gt_boxes
            pred_classes = pred_entry['pred_classes']
            obj_scores = pred_entry['obj_scores']
        elif mode == 'objcls':
            pred_boxes = gt_boxes
            pred_classes = pred_entry['pred_classes']
            obj_scores = pred_entry['obj_scores']

            # same as sgcls but assume perfect predicate recognition
            pred_rel_inds = gt_rels[:, :2]
            rel_scores = np.zeros((len(gt_rels), rel_scores.shape[1]))
            rel_scores[np.arange(len(gt_rels)), gt_rels[:, 2]] = 1

        elif mode == 'sgdet' or mode == 'phrdet':
            pred_boxes = pred_entry['pred_boxes'].astype(float)
            pred_classes = pred_entry['pred_classes']
            obj_scores = pred_entry['obj_scores']
        elif mode == 'preddet':
            # Only extract the indices that appear in GT
            prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
            if prc.size == 0:
                for k in result_dict[mode + '_recall']:
                    result_dict[mode + '_recall'][k].append(0.0)
                if self.per_triplet:
                    for k in result_dict[mode + '_recall_norm']:
                        result_dict[mode + '_recall_norm'][k].append(0.0)
                return None, None, None
            pred_inds_per_gt = prc.argmax(0)
            pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
            rel_scores = rel_scores[pred_inds_per_gt]

            # Now sort the matching ones
            rel_scores_sorted = argsort_desc(rel_scores[:, 1:])
            rel_scores_sorted[:, 1] += 1
            rel_scores_sorted = np.column_stack(
                (pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:,
                                                                           1]))

            matches = intersect_2d(rel_scores_sorted, gt_rels)
            for k in result_dict[mode + '_recall']:
                rec_i = float(matches[:k].any(0).sum()) / float(
                    gt_rels.shape[0])
                result_dict[mode + '_recall'][k].append(rec_i)
            if self.per_triplet:
                for k in result_dict[mode + '_recall_norm']:
                    rec_i = float(matches[:k].any(0).sum()) / float(
                        gt_rels.shape[0])
                    result_dict[mode + '_recall_norm'][k].append(rec_i)
            return None, None, None
        else:
            raise ValueError('invalid mode')

        if multiple_preds:
            obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
            overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:]
            score_inds = argsort_desc(overall_scores)[:MAX_RECALL_K]
            pred_rels = np.column_stack(
                (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1))
            predicate_scores = rel_scores[score_inds[:, 0],
                                          score_inds[:, 1] + 1]
        else:
            pred_rels = np.column_stack(
                (pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1)))
            predicate_scores = rel_scores[:, 1:].max(1)

        # print('eval', gt_rels.shape, pred_rels.shape, predicate_scores.shape, gt_boxes.shape)
        pred_to_gt, pred_5ples, rel_scores = evaluate_recall(
            gt_rels,
            gt_boxes,
            gt_classes,
            pred_rels,
            pred_boxes,
            pred_classes,
            predicate_scores,
            obj_scores,
            phrdet=mode == 'phrdet',
            **kwargs)

        if self.per_triplet:
            counts = np.zeros(len(gt_rels))
            for rel_i, gt_rel in enumerate(gt_rels):
                o, s, R = gt_rel
                tri_str = '{}_{}_{}'.format(gt_classes[o], R, gt_classes[s])
                if tri_str in self.triplet_counts:
                    counts[rel_i] = self.triplet_counts[tri_str]

            weights = self.normalize_counts(counts)

        for k in result_dict[mode + '_recall']:

            match = reduce(np.union1d, pred_to_gt[:k])
            # print('match', match, type(match))
            match = np.array(match).astype(np.int)

            rec_i = float(len(match)) / float(gt_rels.shape[0])
            result_dict[mode + '_recall'][k].append(rec_i)

            if self.per_triplet:
                result_dict[mode + '_recall_norm'][k].append(
                    np.sum(weights[match]))

        if self.per_triplet:
            # TODO: this looks similar to preddet, reuse that code

            score_inds = argsort_desc(overall_scores)
            pred_rels = np.column_stack(
                (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1))

            # Naive and slow code to get per triplet ranks
            ranks, counts = np.zeros(len(gt_rels)) - 1, np.zeros(len(gt_rels))
            for rel_i, gt_rel in enumerate(gt_rels):
                o, s, R = gt_rel
                tri_str = '{}_{}_{}'.format(gt_classes[o], R, gt_classes[s])
                if tri_str in self.triplet_counts:
                    counts[rel_i] = self.triplet_counts[tri_str]

                # select only pairs with this bounding boxes
                ind = np.where((pred_rels[:, 0] == o) & (pred_rels[:, 1] == s)
                               | (pred_rels[:, 0] == s)
                               & (pred_rels[:, 1] == o))[0]
                pred_to_gt_triplet, _, _ = evaluate_recall(
                    gt_rel.reshape(1, -1), gt_boxes, gt_classes,
                    pred_rels[ind], pred_boxes, pred_classes)

                for r, p in enumerate(pred_to_gt_triplet):
                    if len(p) > 0:
                        assert p == [0], (p, gt_rel, pred_to_gt_triplet)
                        ranks[rel_i] = r
                        break

                if ranks[rel_i] < 0:
                    ranks[rel_i] = MAX_RECALL_K + 1
                # For sgcls not all combinations are present, so take some max rank as the default value

                if tri_str not in self.triplet_ranks:
                    self.triplet_ranks[tri_str] = []
                self.triplet_ranks[tri_str].append(ranks[rel_i])

            result_dict[mode + '_rank'].extend(ranks)
            result_dict[mode + '_counts'].extend(
                counts)  # save count to normalize later

        return pred_to_gt, pred_5ples, rel_scores
Ejemplo n.º 14
0
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False,
                       viz_dict=None, **kwargs):
    """
    Shortcut to doing evaluate_recall from dict
    :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes
    :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes
    :param mode: 'det' or 'cls'
    :param result_dict: 
    :param viz_dict: 
    :param kwargs: 
    :return: 
    """
    gt_rels = gt_entry['gt_relations']
    gt_boxes = gt_entry['gt_boxes'].astype(float)
    gt_classes = gt_entry['gt_classes']
    # gt_filenames = gt_entry['filenames']

    pred_rel_inds = pred_entry['pred_rel_inds']
    rel_scores = pred_entry['rel_scores']

    if mode == 'predcls':
        pred_boxes = gt_boxes
        pred_classes = gt_classes
        obj_scores = np.ones(gt_classes.shape[0])
    elif mode == 'sgcls':
        pred_boxes = gt_boxes
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'sgdet' or mode == 'phrdet':
        pred_boxes = pred_entry['pred_boxes'].astype(float)
        pred_classes = pred_entry['pred_classes']
        obj_scores = pred_entry['obj_scores']
    elif mode == 'preddet':
        # Only extract the indices that appear in GT
        prc = intersect_2d(pred_rel_inds, gt_rels[:, :2])
        if prc.size == 0:
            for k in result_dict[mode + '_recall']:
                result_dict[mode + '_recall'][k].append(0.0)
            return None, None, None
        pred_inds_per_gt = prc.argmax(0)
        pred_rel_inds = pred_rel_inds[pred_inds_per_gt]
        rel_scores = rel_scores[pred_inds_per_gt]

        # Now sort the matching ones
        rel_scores_sorted = argsort_desc(rel_scores[:,1:])
        rel_scores_sorted[:,1] += 1
        rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:,0]], rel_scores_sorted[:,1]))

        matches = intersect_2d(rel_scores_sorted, gt_rels)
        for k in result_dict[mode + '_recall']:
            rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0])
            result_dict[mode + '_recall'][k].append(rec_i)
        return None, None, None
    else:
        raise ValueError('invalid mode')

    if multiple_preds:
        obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1)
        overall_scores = obj_scores_per_rel[:,None] * rel_scores[:,1:]
        score_inds = argsort_desc(overall_scores)[:100]
        pred_rels = np.column_stack((pred_rel_inds[score_inds[:,0]], score_inds[:,1]+1))
        predicate_scores = rel_scores[score_inds[:,0], score_inds[:,1]+1]
    else:
        pred_rels = np.column_stack((pred_rel_inds, 1+rel_scores[:,1:].argmax(1)))
        predicate_scores = rel_scores[:,1:].max(1)

    pred_to_gt, pred_5ples, rel_scores = evaluate_recall(
                gt_rels, gt_boxes, gt_classes,
                pred_rels, pred_boxes, pred_classes,
                predicate_scores, obj_scores, phrdet= mode=='phrdet',
                **kwargs)

    for k in result_dict[mode + '_recall']:

        match = reduce(np.union1d, pred_to_gt[:k])
        # FIXME: I think this part of original code is wrong. We shouldn't do union.
        #: stores tuples (hits, count)
        hits_per_rel = dict()
        # gt_rels: shape: (m, 3), (s, p, r)
        for i in range(gt_rels.shape[0]):
            gt_s, gt_o, gt_r = gt_rels[i]
            hits_per_rel.setdefault(gt_r, [0, 0])
            hits_per_rel[gt_r][1] += 1
            hits_per_rel[gt_r][0] += i in match
        rec_per_rel = {r: (hits, cnt) for r, (hits, cnt) in hits_per_rel.items()}

        rec_i = float(len(match)) / float(gt_rels.shape[0])
        result_dict[mode + '_recall'][k].append(rec_i)
        result_dict[mode + '_recall_per_rel'][k].append(rec_per_rel)
    return pred_to_gt, pred_5ples, rel_scores