Esempio n. 1
0
    def get_rel_inds(self, rel_labels, im_inds, box_priors):
        """
        Get the relationship candidates
        :param rel_labels: array of relation labels
        :param im_inds:  image indices
        :param box_priors: RoI bounding boxes
        :return rel_inds
        """
        if self.training:
            rel_inds = rel_labels[:, :3].data.clone()
        else:
            rel_cands = im_inds.data[:, None] == im_inds.data[None]
            rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0

            # Require overlap for detection
            if self.require_overlap:
                rel_cands = rel_cands & (bbox_overlaps(box_priors.data,
                                                       box_priors.data) > 0)

                # if there are fewer then 100 things then we might as well add some?
                amt_to_add = 100 - rel_cands.long().sum()

            rel_cands = rel_cands.nonzero()
            if rel_cands.dim() == 0:
                rel_cands = im_inds.data.new(1, 2).fill_(0)

            rel_inds = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1)
        return rel_inds
Esempio n. 2
0
    def get_rel_inds(self, rel_labels, im_inds, box_priors):
        """Get relation index
        Args:
            rel_labels: Variable
            im_inds: Variable
            box_priors: Variable
        """
        # Get the relationship candidates
        if self.training:
            rel_inds = rel_labels[:, :3].data.contiguous().clone()
        else:
            rel_cands = im_inds.data[:, None] == im_inds.data[None]
            rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0

            # Require overlap for detection
            if self.require_overlap:
                rel_cands = rel_cands & (bbox_overlaps(box_priors.data,
                                                       box_priors.data) > 0)

                # if there are fewer then 100 things then we might as well add some?
                amt_to_add = 100 - rel_cands.long().sum()

            rel_cands = rel_cands.nonzero()
            if rel_cands.dim() == 0:
                rel_cands = im_inds.data.new(1, 2).fill_(0)

            rel_inds = torch.cat(
                (im_inds.data[rel_cands[:, 0]][:,
                                               None].contiguous(), rel_cands),
                1)
        return rel_inds
Esempio n. 3
0
    def get_msg_rel_inds(self, im_inds, box_priors, box_score):

        rel_cands = im_inds.data[:, None] == im_inds.data[None]
        rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0

        if self.require_overlap:
            rel_cands = rel_cands & (bbox_overlaps(box_priors.data,
                                                   box_priors.data) > conf.overlap_thresh)
        rel_cands = rel_cands.nonzero()
        if rel_cands.dim() == 0:
            rel_cands = im_inds.data.new(1, 2).fill_(0)

        rel_inds = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1)
        return rel_inds
Esempio n. 4
0
    def get_rel_inds(self, rel_labels, im_inds, box_priors, box_score):

        if self.training:
            rel_inds = rel_labels[:, :3].data.clone()
        else:
            rel_cands = im_inds.data[:, None] == im_inds.data[None]
            rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0

            # Require overlap for detection
            # Require overlap in the test stage
            if self.require_overlap:
                rel_cands = rel_cands & (bbox_overlaps(box_priors.data,
                                                       box_priors.data) > 0)
            rel_cands = rel_cands.nonzero()
            if rel_cands.dim() == 0:
                rel_cands = im_inds.data.new(1, 2).fill_(0)

            rel_inds = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1)
        return rel_inds
def get_overlap_info(im_inds, box_priors):
    """
    input:
        im_inds: [num_object]
        box_priors: [number_object, 4]
    output: [number_object, 6]
        number of overlapped obj (self not included)
        sum of all intersection area (self not included)
        sum of IoU (Intersection over Union)
        average of all intersection area (self not included)
        average of IoU (Intersection over Union)
        roi area
    """
    # generate forest
    num_obj = box_priors.shape[0]
    inds_offset = (im_inds * 1000).view(-1, 1).expand(box_priors.shape)
    offset_box = box_priors + inds_offset.float()
    intersection = bbox_intersections(offset_box, offset_box)
    overlap = bbox_overlaps(offset_box, offset_box)
    # [obj_num, obj_num], diagonal elements should been removed
    reverse_eye = Variable(1.0 - torch.eye(num_obj).float().cuda())
    intersection = intersection * reverse_eye
    overlap = overlap * reverse_eye
    box_area = bbox_area(offset_box)
    # generate input feat
    boxes_info = Variable(torch.FloatTensor(
        num_obj, 6).zero_().cuda())  # each obj has how many overlaped objects

    for obj_idx in range(num_obj):
        boxes_info[obj_idx, 0] = torch.nonzero(intersection[obj_idx]).numel()
        boxes_info[obj_idx, 1] = intersection[obj_idx].view(-1).sum() / float(
            IM_SCALE * IM_SCALE)
        boxes_info[obj_idx, 2] = overlap[obj_idx].view(-1).sum()
        boxes_info[obj_idx,
                   3] = boxes_info[obj_idx,
                                   1] / (boxes_info[obj_idx, 0] + 1e-9)
        boxes_info[obj_idx,
                   4] = boxes_info[obj_idx,
                                   2] / (boxes_info[obj_idx, 0] + 1e-9)
        boxes_info[obj_idx, 5] = box_area[obj_idx] / float(IM_SCALE * IM_SCALE)

    return boxes_info, intersection
Esempio n. 6
0
    def forward(self, x, im_sizes, image_offset,
                gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None,
                return_fmap=False, depth_imgs=None):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param proposals: things
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
        """
        fmap = self.feature_map(x)
        # whats rel_labels? how con get boxes give us relation labels?!!
        #  It is basically a N*N matrix W (N is the number of boxes) where W[i,j] = 1 if bounding
        #  box of i and j have intersetion. If self.require_overlap = False, then the matrix is all
        #  1s (except for the diagonal)
        # Get boxes from RPN
        rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \
            self.get_boxes(fmap, im_sizes, image_offset, gt_boxes,
                           gt_classes, gt_rels, train_anchor_inds, proposals=proposals)

        # Now classify them
        obj_fmap = self.obj_feature_map(fmap, rois)
        od_obj_dists = self.score_fc(obj_fmap)
        #  whats that? It is for refining the bounding boxes.
        od_box_deltas = self.bbox_fc(obj_fmap).view(
            -1, len(self.classes), 4) if self.mode != 'gtbox' else None
        #  whats that? Bounding box coordinates ( rois[:, 0] contains the image indices)
        od_box_priors = rois[:, 1:]

        if (not self.training and not self.mode == 'gtbox') or self.mode in ('proposals', 'refinerels'):
            nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                od_obj_dists,
                rois,
                od_box_deltas, im_sizes,
            )
            #  whats im_inds, whats image_offset? Its always zero, they don't really use it
            im_inds = nms_imgs + image_offset
            obj_dists = od_obj_dists[nms_inds]
            obj_fmap = obj_fmap[nms_inds]
            box_deltas = od_box_deltas[nms_inds]
            box_priors = nms_boxes[:, 0]

            if self.training and not self.mode == 'gtbox':
                # NOTE: If we're doing this during training, we need to assign labels here.
                pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0

                max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                rm_obj_labels[max_overlaps < 0.5] = 0
            else:
                rm_obj_labels = None
        else:
            im_inds = rois[:, 0].long().contiguous() + image_offset
            nms_scores = None
            nms_preds = None
            nms_boxes_assign = None
            nms_boxes = None
            # whats box priors? difference with od_box_priors? Its the same in Predcls, in SGCls it
            #  is the ground truth (while the other one comes from the classifier)
            box_priors = rois[:, 1:]
            # whats the difference with od_obj_dists? The same except that it depends on ground truth boxes.
            rm_obj_labels = obj_labels
            box_deltas = od_box_deltas
            obj_dists = od_obj_dists

        return Result(
            od_obj_dists=od_obj_dists,
            rm_obj_dists=obj_dists,
            obj_scores=nms_scores,
            obj_preds=nms_preds,
            obj_fmap=obj_fmap,
            od_box_deltas=od_box_deltas,
            rm_box_deltas=box_deltas,
            od_box_targets=bbox_targets,
            rm_box_targets=bbox_targets,
            od_box_priors=od_box_priors,
            rm_box_priors=box_priors,
            boxes_assigned=nms_boxes_assign,
            boxes_all=nms_boxes,
            od_obj_labels=obj_labels,
            rm_obj_labels=rm_obj_labels,
            rpn_scores=rpn_scores,
            rpn_box_deltas=rpn_box_deltas,
            rel_labels=rel_labels,
            im_inds=im_inds,
            fmap=fmap if return_fmap else None
        )
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_classes=None,
                gt_rels=None,
                proposals=None,
                train_anchor_inds=None,
                return_fmap=False):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param proposals: things
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
        """
        # shape: (batch_size, 512, 37, 37)
        fmap = self.feature_map(x)

        # Get boxes from RPN
        # rois: (NumOfRoIs, 5), [img_ind, x0, x1, y0, y1]
        # obj_labels: (NumOfRoIs,), object class index
        # bbox_targets: NoneType in Rel_Model
        # rpn_scores:NoneType in Rel_Model
        # rpn_box_deltas:
        # rel_labels (NumOfRels, 4) [img ind, box0 ind, box1ind, rel type]
        #   rel_labels is relation labels of every proposal in image
        #   eg. in this gpu: there are 2 images with 8 proposals and 9 proposals, so
        #   rel_labels first dimension size is 8*(8-1)+9*(9-1) = 56+72 = 128
        #   the proposal numbers can be inferred from `rois`
        rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \
            self.get_boxes(fmap, im_sizes, image_offset, gt_boxes,
                           gt_classes, gt_rels, train_anchor_inds, proposals=proposals)

        # Now classify them
        # obj_fmap: (NumOfRoI, 4096)
        obj_fmap = self.obj_feature_map(fmap, rois)
        # od_obj_dists: (NumOfRoI, NumOfClasses)
        od_obj_dists = self.score_fc(obj_fmap)
        od_box_deltas = self.bbox_fc(obj_fmap).view(-1, len(
            self.classes), 4) if self.mode != 'gtbox' else None

        od_box_priors = rois[:, 1:]

        if (not self.training
                and not self.mode == 'gtbox') or self.mode in ('proposals',
                                                               'refinerels'):
            nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                od_obj_dists,
                rois,
                od_box_deltas,
                im_sizes,
            )
            im_inds = nms_imgs + image_offset
            obj_dists = od_obj_dists[nms_inds]
            obj_fmap = obj_fmap[nms_inds]
            box_deltas = od_box_deltas[nms_inds]
            box_priors = nms_boxes[:, 0]

            if self.training and not self.mode == 'gtbox':
                # NOTE: If we're doing this during training, we need to assign labels here.
                pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                pred_to_gtbox[im_inds.data[:,
                                           None] != gt_classes.data[None, :,
                                                                    0]] = 0.0

                max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                rm_obj_labels[max_overlaps < 0.5] = 0
            else:
                rm_obj_labels = None
        else:
            im_inds = rois[:, 0].long().contiguous() + image_offset
            nms_scores = None
            nms_preds = None
            nms_boxes_assign = None
            nms_boxes = None
            box_priors = rois[:, 1:]
            rm_obj_labels = obj_labels
            box_deltas = od_box_deltas
            obj_dists = od_obj_dists
        #embed(header='object_detector.py before return')

        return Result(
            od_obj_dists=od_obj_dists,
            rm_obj_dists=obj_dists,
            obj_scores=nms_scores,
            obj_preds=nms_preds,
            obj_fmap=obj_fmap,
            od_box_deltas=od_box_deltas,
            rm_box_deltas=box_deltas,
            od_box_targets=bbox_targets,
            rm_box_targets=bbox_targets,
            od_box_priors=od_box_priors,
            rm_box_priors=box_priors,
            boxes_assigned=nms_boxes_assign,
            boxes_all=nms_boxes,
            od_obj_labels=obj_labels,
            rm_obj_labels=rm_obj_labels,
            rpn_scores=rpn_scores,
            rpn_box_deltas=rpn_box_deltas,
            rel_labels=rel_labels,
            im_inds=im_inds,
            fmap=fmap if return_fmap else None,
        )
Esempio n. 8
0
def proposal_assignments_rel(rpn_rois,
                             gt_boxes,
                             gt_classes,
                             gt_rels,
                             image_offset,
                             fg_thresh=0.5):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
        rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
    """
    fg_rois_per_image = int(np.round(ROIS_PER_IMG_REL * FG_FRACTION_REL))
    fg_rels_per_image = int(np.round(REL_FG_FRACTION * RELS_PER_IMG))

    pred_inds_np = rpn_rois[:, 0].cpu().numpy().astype(np.int64)
    pred_boxes_np = rpn_rois[:, 1:].cpu().numpy()
    gt_boxes_np = gt_boxes.cpu().numpy()
    gt_classes_np = gt_classes.cpu().numpy()
    gt_rels_np = gt_rels.cpu().numpy()

    gt_classes_np[:, 0] -= image_offset
    gt_rels_np[:, 0] -= image_offset

    num_im = gt_classes_np[:, 0].max() + 1

    rois = []
    obj_labels = []
    rel_labels = []
    bbox_targets = []

    num_box_seen = 0

    for im_ind in range(num_im):
        pred_ind = np.where(pred_inds_np == im_ind)[0]

        gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
        gt_boxes_i = gt_boxes_np[gt_ind]
        gt_classes_i = gt_classes_np[gt_ind, 1]
        gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]

        pred_boxes_i = np.concatenate((pred_boxes_np[pred_ind], gt_boxes_i), 0)
        ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)

        obj_inds_i, obj_labels_i, obj_assignments_i = _sel_inds(
            ious, gt_classes_i, fg_thresh, fg_rois_per_image, ROIS_PER_IMG_REL)

        all_rels_i = _sel_rels(ious[obj_inds_i],
                               pred_boxes_i[obj_inds_i],
                               obj_labels_i,
                               gt_classes_i,
                               gt_rels_i,
                               fg_thresh=fg_thresh,
                               fg_rels_per_image=fg_rels_per_image)
        all_rels_i[:, 0:2] += num_box_seen

        rois.append(
            np.column_stack((
                im_ind * np.ones(obj_inds_i.shape[0], dtype=np.float32),
                pred_boxes_i[obj_inds_i],
            )))
        obj_labels.append(obj_labels_i)
        rel_labels.append(
            np.column_stack((
                im_ind * np.ones(all_rels_i.shape[0], dtype=np.int64),
                all_rels_i,
            )))

        # print("Gtboxes i {} obj assignments i {}".format(gt_boxes_i, obj_assignments_i))
        bbox_targets.append(gt_boxes_i[obj_assignments_i])

        num_box_seen += obj_inds_i.size

    rois = torch.FloatTensor(np.concatenate(rois,
                                            0)).cuda(rpn_rois.get_device(),
                                                     non_blocking=True)
    labels = torch.LongTensor(np.concatenate(obj_labels,
                                             0)).cuda(rpn_rois.get_device(),
                                                      non_blocking=True)
    bbox_targets = torch.FloatTensor(np.concatenate(bbox_targets, 0)).cuda(
        rpn_rois.get_device(), non_blocking=True)
    rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(
        rpn_rois.get_device(), non_blocking=True)

    return rois, labels, bbox_targets, rel_labels
Esempio n. 9
0
def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)):
    det_res = detector[b]
    # if conf.num_gpus == 1:
    #     det_res = [det_res]
    assert conf.num_gpus == 1
    boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res

    gt_entry = {
        'gt_classes': val.gt_classes[batch_num].copy(),  # (23,)
        'gt_relations': val.relationships[batch_num].copy(),  # (29, 3)
        'gt_boxes': val.gt_boxes[batch_num].copy(),  # (23, 4)
    }
    # gt_entry = {'gt_classes': gtc[i], 'gt_relations': gtr[i], 'gt_boxes': gtb[i]}
    assert np.all(objs_i[rels_i[:, 0]] > 0) and np.all(objs_i[rels_i[:, 1]] > 0)
    # assert np.all(rels_i[:, 2] > 0)

    pred_entry = {
        'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE,  # (64, 4)
        'pred_classes': objs_i,  # (64,)
        'pred_rel_inds': rels_i,  # (1202, 2)
        'obj_scores': obj_scores_i,  # (64,)
        'rel_scores': pred_scores_i,  # (1202, 51)
    }

    # pred_5ples: (num_rel, 5), (id0, id1, cls0, cls1, rel)
    pred_to_gt, pred_5ples, rel_scores = evaluator[conf.mode].evaluate_scene_graph_entry(
        gt_entry,
        pred_entry,
    )

    # SET RECALL THRESHOLD HERE
    pred_to_gt = pred_to_gt[:50]
    pred_5ples = pred_5ples[:50]

    # Get a list of objects that match, and GT objects that dont
    objs_match = (bbox_overlaps(pred_entry['pred_boxes'], gt_entry['gt_boxes']) >= 0.5) & (
            objs_i[:, None] == gt_entry['gt_classes'][None]
    )
    objs_matched = objs_match.any(1)

    has_seen = defaultdict(int)
    has_seen_gt = defaultdict(int)
    pred_ind2name = {}
    gt_ind2name = {}
    edges = {}
    missededges = {}
    badedges = {}

    if val.filenames[batch_num].startswith('625'):
        import ipdb
        ipdb.set_trace()

    # query_pred and query_gt is giving the name to the different instance in the same class
    # generate "man-1", "man-2", ...
    def query_pred(pred_ind):
        if pred_ind not in pred_ind2name:
            # "pred_ind" is the row index of objs_i, objs_i[pred_ind] gets a value representing a class
            # get the name of this class using this value and train.'ind_to_classes'
            has_seen[objs_i[pred_ind]] += 1
            pred_ind2name[pred_ind] = '{}-{}'.format(train.ind_to_classes[objs_i[pred_ind]],
                                                     has_seen[objs_i[pred_ind]])
        return pred_ind2name[pred_ind]

    def query_gt(gt_ind):
        gt_cls = gt_entry['gt_classes'][gt_ind]
        if gt_ind not in gt_ind2name:
            has_seen_gt[gt_cls] += 1
            gt_ind2name[gt_ind] = '{}-GT{}'.format(train.ind_to_classes[gt_cls], has_seen_gt[gt_cls])
        return gt_ind2name[gt_ind]

    ###############################################################################################################
    # divide gt_5ples and pred_5ples into 4 parts: edges, missededges, badedges (50-good edges)
    # 5ples: (# gt/pred rel, 5), (id0, id1, cls0, cls1, rel); id0, id1 are the row index of "gt/pred_classes" array
    ###############################################################################################################
    # 1. edges
    matching_pred5ples = pred_5ples[np.array([len(x) > 0 for x in pred_to_gt])]  # the matched 5ples, shaped (#pred, 5), but only #match has content
    for fiveple in matching_pred5ples:  # fiveple: the 5ples that get "matched"
        head_name = query_pred(fiveple[0])  # get "man-2"
        tail_name = query_pred(fiveple[1])  # get "ball-1"
        edges[(head_name, tail_name)] = train.ind_to_predicates[fiveple[4]] #{(man-2,ball-1): playing  ...}
    
    # 2. missededges
    gt_5ples = np.column_stack((gt_entry['gt_relations'][:, :2],
                                gt_entry['gt_classes'][gt_entry['gt_relations'][:, 0]],
                                gt_entry['gt_classes'][gt_entry['gt_relations'][:, 1]],
                                gt_entry['gt_relations'][:, 2],
                                ))  # [ind0, ind1, cls0, cls1, rel]
    has_match = reduce(np.union1d, pred_to_gt)  # the list of row index of gt_5ples which get matched; [ 5. 10. 11. 12.] 
    
    # give the 5ples names (-1, -2, -GT ...) which don't get matched
    for gt in gt_5ples[np.setdiff1d(np.arange(gt_5ples.shape[0]), has_match)]:  # get the row index which doesn't get matched
        # gt is the missed gt_5ples; Head and tail
        namez = []
        for i in range(2): # i = 0, 1 corresponds obj1, obj2
            matching_obj = np.where(objs_match[:, gt[i]])[0]
            # >0 means this gt (shaped [1,5]) 
            if matching_obj.size > 0:
                name = query_pred(matching_obj[0])
            else:
                name = query_gt(gt[i])
            namez.append(name)
        missededges[tuple(namez)] = train.ind_to_predicates[gt[4]]  #{(woman-2,ball-1): playing  ...}
    
    # 3. badedges
    # fiveple: get the 5ples that no head or tail existing in good edges 
    not_matching_pred5ples = pred_5ples[np.array([len(x) == 0 for x in pred_to_gt])]
    for fiveple in not_matching_pred5ples:
    #for fiveple in pred_5ples[np.setdiff1d(np.arange(pred_5ples.shape[0]), matching_pred5ples)]:
        head_name_bad = query_pred(fiveple[0])
        tail_name_bad = query_pred(fiveple[1])
        badedges[(head_name_bad, tail_name_bad)] = train.ind_to_predicates[fiveple[4]]
        # two "if" branch kill most 5ples 
        #if fiveple[0] in pred_ind2name:
           # if fiveple[1] in pred_ind2name:
               # badedges[(pred_ind2name[fiveple[0]], pred_ind2name[fiveple[1]])] = train.ind_to_predicates[fiveple[4]]

    theimg = load_unscaled(val.filenames[batch_num])
    draw1 = ImageDraw.Draw(theimg)
    theimg2 = theimg.copy()
    draw2 = ImageDraw.Draw(theimg2)
    theimg3 = theimg.copy()
    draw3 = ImageDraw.Draw(theimg3)    

    # using pred/gt_ind2name to fix the names of different instances with the same classes
    # gt/pred_ind is the keys: id0, id1 of 5ples
    # draw man-1, man-2 onto the corresponding object's box
    for pred_ind in pred_ind2name.keys():
        draw1 = draw_box(draw1, pred_entry['pred_boxes'][pred_ind],
                         cls_ind=objs_i[pred_ind],
                         text_str=pred_ind2name[pred_ind])
    for gt_ind in gt_ind2name.keys():
        draw2 = draw_box(draw2, gt_entry['gt_boxes'][gt_ind],
                         cls_ind=gt_entry['gt_classes'][gt_ind],
                         text_str=gt_ind2name[gt_ind])
    #import ipdb
    #ipdb.set_trace()
    for pred_64 in range(pred_entry['pred_boxes'].shape[0]):
        if pred_64 not in pred_ind2name: # pred_ind2name's key is the index of pred_boxes (64)
            class_score_text = train.ind_to_classes[pred_entry['pred_classes'][pred_64]] + \
                            '--' + str(pred_entry['obj_scores'][pred_64])
            draw3 = draw_box(draw3, pred_entry['pred_boxes'][pred_64,:],
                         cls_ind= pred_entry['pred_classes'][pred_64],
                         text_str=class_score_text)
        
    # "-60" means recall is 60
    recall = int(100 * len(reduce(np.union1d, pred_to_gt)) / gt_entry['gt_relations'].shape[0])

    id = '{}-{}'.format(val.filenames[batch_num].split('/')[-1][:-4], recall)
    dirname = '/home/yiwuzhong/motifs/qualitative/' + conf.mode + '/'
    pathname = os.path.join(dirname)
    if not os.path.exists(pathname):
        os.mkdir(pathname)
    theimg.save(os.path.join(pathname, id + '-deteceted.jpg'), quality=100, subsampling=0)
    theimg2.save(os.path.join(pathname, id + '-missed.jpg'), quality=100, subsampling=0)
    theimg3.save(os.path.join(pathname, id + '-rcnnbox.jpg'), quality=100, subsampling=0)
    
    #import ipdb
    #ipdb.set_trace()
    with open(os.path.join(pathname, id + '.txt'), 'w') as f:
        f.write('Good: gt and detected \n')
        for (o1, o2), p in edges.items():
            f.write('{} - {} - {}\n'.format(o1, p, o2))
        f.write('\nMissed: gt but missed \n')
        for (o1, o2), p in missededges.items():
            f.write('{} - {} - {}\n'.format(o1, p, o2))
        f.write('\nBad: not gt but detected \n')
        for (o1, o2), p in badedges.items():
            f.write('{} - {} - {}\n'.format(o1, p, o2))

    with open(os.path.join(pathname, id + '-box.txt'), 'w') as bb:
        bb.write('Detected Boxes from Faster RCNN')
        for bbi in range(pred_entry['pred_classes'].shape[0]):
            bb.write('{}: {}\n'.format(train.ind_to_classes[pred_entry['pred_classes'][bbi]], pred_entry['obj_scores'][bbi]))
Esempio n. 10
0
def proposal_assignments_det(rpn_rois,
                             gt_boxes,
                             gt_classes,
                             image_offset,
                             fg_thresh=0.5):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1
    :param gt_classes: [num_boxes, 2.0] array of [img_ind, class]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
    """
    fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION))

    gt_img_inds = gt_classes[:, 0] - image_offset

    all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0)

    ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0)

    im_sorted, idx = torch.sort(ims_per_box, 0)
    all_boxes = all_boxes[idx]

    # Assume that the GT boxes are already sorted in terms of image id
    num_images = int(im_sorted[-1]) + 1

    labels = []
    rois = []
    bbox_targets = []
    for im_ind in range(num_images):
        g_inds = (gt_img_inds == im_ind).nonzero()

        if g_inds.dim() == 0:
            continue
        g_inds = g_inds.squeeze(1)
        g_start = g_inds[0]
        g_end = g_inds[-1] + 1

        t_inds = (im_sorted == im_ind).nonzero().squeeze(1)
        t_start = t_inds[0]
        t_end = t_inds[-1] + 1

        # Max overlaps: for each predicted box, get the max ROI
        # Get the indices into the GT boxes too (must offset by the box start)
        ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end])
        max_overlaps, gt_assignment = ious.max(1)
        max_overlaps = max_overlaps.cpu().numpy()
        # print("Best overlap is {}".format(max_overlaps.max()))
        # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start))
        gt_assignment += g_start

        keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh,
                                         fg_rois_per_image, ROIS_PER_IMG)

        if keep_inds_np.size == 0:
            continue

        keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device())

        labels_ = gt_classes[:, 1][gt_assignment[keep_inds]]
        bbox_target_ = gt_boxes[gt_assignment[keep_inds]]

        # Clamp labels_ for the background RoIs to 0
        if num_fg < labels_.size(0):
            labels_[num_fg:] = 0

        rois_ = torch.cat((
            im_sorted[t_start:t_end, None][keep_inds].float(),
            all_boxes[t_start:t_end][keep_inds],
        ), 1)

        labels.append(labels_)
        rois.append(rois_)
        bbox_targets.append(bbox_target_)

    rois = torch.cat(rois, 0)
    labels = torch.cat(labels, 0)
    bbox_targets = torch.cat(bbox_targets, 0)
    return rois, labels, bbox_targets
Esempio n. 11
0

        
Esempio n. 12
0
    def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None, batch_size=None,
                rois=None, od_box_deltas=None, im_sizes=None, image_offset=None, gt_classes=None, gt_boxes=None, ):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        pos_embed = self.pos_embed(Variable(center_size(box_priors)))
        obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1)


        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            if self.mode == 'sgcls':

                obj_dists2 = self.decoder_lin1(obj_pre_rep)
                obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, 1, 1024), 1)

                obj_dists2 = obj_dists2[1]

                obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024))

            else:
                # this is for sgdet

                obj_dists2 = self.decoder_lin1(obj_pre_rep)

                perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data, None, box_priors)
                obj_dists2 = obj_dists2[perm].contiguous()
                obj_dists2 = PackedSequence(obj_dists2, torch.tensor(ls_transposed))
                obj_dists2, lengths1 = pad_packed_sequence(obj_dists2, batch_first=False)


                obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, batch_size, 1024), batch_size)[1]


                obj_dists2, _ = pack_padded_sequence(obj_dists2, lengths1, batch_first=False)
                obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024))
                obj_dists2 = obj_dists2[inv_perm]


                if (not self.training and not self.mode == 'gtbox') or self.mode in ('sgdet', 'refinerels'):
                    # try: dont apply nms here, but after own obj_classifier
                    nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                        obj_dists2.clone().detach(),
                        rois,
                        od_box_deltas.clone().detach(), im_sizes,
                    )
                    im_inds = nms_imgs + image_offset
                    obj_dists2 = obj_dists2[nms_inds]
                    obj_fmap = obj_fmaps[nms_inds]
                    box_deltas = od_box_deltas[nms_inds]
                    box_priors = nms_boxes[:, 0]
                    rois = rois[nms_inds]

                    if self.training and not self.mode == 'gtbox':
                        # NOTE: If we're doing this during training, we need to assign labels here.
                        pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                        pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0

                        max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                        rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                        rm_obj_labels[max_overlaps < 0.5] = 0
                    else:
                        rm_obj_labels = None

        if self.mode == 'sgdet' and not self.training:  # have tried in training
            # NMS here for baseline

            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = nms_boxes.data[:, c_i]

                keep = apply_nms(scores_ci, boxes_ci,
                                 pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.5)#nms_thresh= 0.3 default
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1  # this for sgdet test

            #obj_preds=obj_dists2[:,1:].max(1)[1] + 1
        else:
            if self.mode == 'sgdet':
                # use gt
                obj_preds = rm_obj_labels if rm_obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1
                # use_predicted label
                # obj_preds = obj_dists2[:, 1:].max(1)[1] + 1
            else:
                obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1

        if self.mode == 'sgdet':
            return obj_dists2, obj_preds, im_inds, box_priors, rm_obj_labels, rois, nms_boxes
        else:
            return obj_dists2, obj_preds
Esempio n. 13
0
def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
    """

    fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION))
    # to some extent, gt_classes[88,0] - image_offset = rois[12000, 0]
    gt_img_inds = gt_classes[:, 0] - image_offset

    all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0)  # [88+12000, 4]

    ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0)  # [88+12000, 1]
    # sort according to the image index from 0 to 5 (6 images)
    im_sorted, idx = torch.sort(ims_per_box, 0)
    all_boxes = all_boxes[idx]

    # Assume that the GT boxes are already sorted in terms of image id
    num_images = int(im_sorted[-1]) + 1

    labels = []
    rois = []
    bbox_targets = []
    for im_ind in range(num_images): # traversal each single image
        # find the index(row, column) of nonzero
        g_inds = (gt_img_inds == im_ind).nonzero()

        if g_inds.dim() == 0:
            continue
        g_inds = g_inds.squeeze(1)
        g_start = g_inds[0]  # the start index of certain image in gt_img_inds
        g_end = g_inds[-1] + 1    # the end index of certain image in gt_img_inds

        t_inds = (im_sorted == im_ind).nonzero().squeeze(1)
        t_start = t_inds[0]  # the start index of certain image in im_sorted
        t_end = t_inds[-1] + 1    # the end index of certain image in im_sorted

        # Max overlaps: for each predicted box, get the max ROI
        # Get the indices into the GT boxes too (must offset by the box start)
        # compare rois+gtbox and gtbox; ious [t_inds.shape[0], g_inds.shape[0]]
        ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end]) 
        max_overlaps, gt_assignment = ious.max(1)  # gt_assignment is a relative index
        max_overlaps = max_overlaps.cpu().numpy()
        # print("Best overlap is {}".format(max_overlaps.max()))
        # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start))
        gt_assignment += g_start  # the absolute index in gt_classes[:,0]; shape ex: [2011] means 2011 rois+gt boxes of certain image
        
        # keep_inds_np: foreground index + background index; [256,] ex: 39+217
        keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image,
                                         ROIS_PER_IMG)

        if keep_inds_np.size == 0:
            continue
        # covert numpy array to LongTensor
        keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device())

        labels_ = gt_classes[:, 1][gt_assignment[keep_inds]]  # [256] * 6 = 1536
        bbox_target_ = gt_boxes[gt_assignment[keep_inds]]  #[256] * 6 = 1536

        # Clamp labels_ for the background RoIs to 0
        if num_fg < labels_.size(0):
            labels_[num_fg:] = 0
        # rois: [256, 5]
        rois_ = torch.cat((
            im_sorted[t_start:t_end, None][keep_inds].float(),
            all_boxes[t_start:t_end][keep_inds],
        ), 1)

        labels.append(labels_)
        rois.append(rois_)
        bbox_targets.append(bbox_target_)

    rois = torch.cat(rois, 0)
    labels = torch.cat(labels, 0)
    bbox_targets = torch.cat(bbox_targets, 0)
    # rois, labels, bbox_targets are all Tensor
    return rois, labels, bbox_targets
Esempio n. 14
0
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_classes=None,
                gt_rels=None,
                proposals=None,
                train_anchor_inds=None,
                gt_boxes_human=None,
                gt_human_classes=None,
                train_anchor_inds_human=None,
                return_fmap=False):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param proposals: things
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
        """
        fmap = self.feature_map(x)

        if self.training:
            obj_emb = self.obj_embed(gt_classes[:, 1])
            verb_emb = self.verb_embed(gt_human_classes[:, 1])
        else:
            obj_emb = self.obj_embed.weight[1:]
            verb_emb = self.verb_embed.weight[1:]

        # Get boxes from RPN
        rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \
            self.get_boxes(fmap, im_sizes, image_offset, self.rpn_head, obj_emb, gt_boxes,
                           gt_classes, gt_rels, train_anchor_inds, proposals=proposals, mode="obj")

        # Get boxes from RPN_human
        rois_human, verb_labels, bbox_targets_human, rpn_scores_human, rpn_box_deltas_human, rel_labels = \
            self.get_boxes(fmap, im_sizes, image_offset, self.rpn_head_human, None, gt_boxes_human,
                           gt_human_classes, gt_rels, train_anchor_inds_human, proposals=proposals, mode="human")

        # Now classify them (obj)
        obj_fmap = self.obj_feature_map(
            fmap, rois)  ## fmap: 1024*40*40, rois: 2048*5

        od_obj_dists = self.score_fc(obj_fmap)  ## obj_fmap: 2048*2048
        od_box_deltas = self.bbox_fc(obj_fmap).view(-1, len(
            self.classes), 4) if self.mode != 'gtbox' else None

        od_box_priors = rois[:, 1:]

        ## verb classification

        human_fmap = self.obj_feature_map(
            fmap, rois_human)  ## fmap: 1024*40*40, rois: 2048*5

        od_human_dists = self.score_fc_human(
            human_fmap)  ## obj_fmap: 2048*2048

        od_human_box_deltas = self.bbox_fc_human(human_fmap).view(
            -1, 2, 4) if self.mode != 'gtbox' else None

        od_human_box_priors = rois_human[:, 1:]

        if (not self.training
                and not self.mode == 'gtbox') or self.mode in ('proposals',
                                                               'refinerels'):
            nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                od_obj_dists,
                rois,
                od_box_deltas,
                im_sizes,
            )
            im_inds = nms_imgs + image_offset
            obj_dists = od_obj_dists[nms_inds]
            obj_fmap = obj_fmap[nms_inds]
            box_deltas = od_box_deltas[nms_inds]
            box_priors = nms_boxes[:, 0]

            if (not self.training and not self.mode == 'gtbox'):
                # NOTE: If we're doing this during training, we need to assign labels here.
                pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                pred_to_gtbox[im_inds.data[:,
                                           None] != gt_classes.data[None, :,
                                                                    0]] = 0.0

                max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                rm_obj_labels[max_overlaps < 0.5] = 0
            else:
                rm_obj_labels = None
        else:
            im_inds = rois[:, 0].long().contiguous() + image_offset
            nms_scores = None
            nms_preds = None
            nms_boxes_assign = None
            nms_boxes = None
            box_priors = rois[:, 1:]
            rm_obj_labels = obj_labels
            box_deltas = od_box_deltas
            obj_dists = od_obj_dists

        # rel_labels = rel_assignments(im_inds.data, box_priors.data, rm_obj_labels.data,
        #                                     gt_boxes.data, gt_classes.data, gt_rels.data,
        #                                     image_offset, filter_non_overlap=True,
        #                                     num_sample_per_gt=1)

        return Result(od_obj_dists=od_obj_dists,
                      rm_obj_dists=obj_dists,
                      obj_scores=nms_scores,
                      obj_preds=nms_preds,
                      obj_fmap=obj_fmap,
                      od_box_deltas=od_box_deltas,
                      rm_box_deltas=box_deltas,
                      od_box_targets=bbox_targets,
                      rm_box_targets=bbox_targets,
                      od_box_priors=od_box_priors,
                      rm_box_priors=box_priors,
                      boxes_assigned=nms_boxes_assign,
                      boxes_all=nms_boxes,
                      od_obj_labels=obj_labels,
                      rm_obj_labels=rm_obj_labels,
                      rpn_scores=rpn_scores,
                      rpn_box_deltas=rpn_box_deltas,
                      rel_labels=rel_labels,
                      im_inds=im_inds,
                      rpn_scores_human=rpn_scores_human,
                      rpn_box_deltas_human=rpn_box_deltas_human,
                      od_human_dists=od_human_dists,
                      od_human_box_deltas=od_human_box_deltas,
                      od_human_bbox_targets_human=bbox_targets_human,
                      od_human_box_priors=od_human_box_priors,
                      od_verb_labels=verb_labels,
                      fmap=fmap if return_fmap else None)
Esempio n. 15
0
    def rpn_boxes(self,
                  fmap,
                  im_sizes,
                  image_offset,
                  gt_boxes=None,
                  gt_classes=None,
                  gt_rels=None,
                  train_anchor_inds=None,
                  proposals=None):
        """
        Gets boxes from the RPN
        :param fmap:
        :param im_sizes:
        :param image_offset:
        :param gt_boxes:
        :param gt_classes:
        :param gt_rels:
        :param train_anchor_inds:
        :return:
        """
        # rpn_feats:[6,37,37,20,6], scores+deltas of all anchors; RPN_Head.init & forward
        rpn_feats = self.rpn_head(fmap)
        # roi_proposals: pre_nms_topn=6000, post_nms_topn=1000, thres=0.7, filter boxes from 16w to 4275 (6 images)
        # get the first max(6000, #boxes) boxes per img, then apply nms, get max(1000, #boxes) per img
        # rois: [12000, 5] if rpntrain  /  [4000+, 5] if refinerels(sgdet)
        rois = self.rpn_head.roi_proposals(
            rpn_feats,
            im_sizes,
            nms_thresh=0.7,
            pre_nms_topn=12000
            if self.training and self.mode == 'rpntrain' else 6000,
            post_nms_topn=2000
            if self.training and self.mode == 'rpntrain' else 1000,
        )
        #ipdb.set_trace()
        if self.training:
            if gt_boxes is None or gt_classes is None or train_anchor_inds is None:
                raise ValueError(
                    "Must supply GT boxes, GT classes, trainanchors when in train mode"
                )

            rpn_scores, rpn_box_deltas = self.rpn_head.anchor_preds(
                rpn_feats, train_anchor_inds, image_offset)

            if gt_rels is not None and self.mode == 'rpntrain':
                raise ValueError(
                    "Training the object detector and the relationship model with detection"
                    "at the same time isn't supported")

            # sgdet/refinerels
            if self.mode == 'refinerels':
                # NOTE: If we're doing this during training, we need to assign labels here.
                #ipdb.set_trace()
                pred_to_gtbox = bbox_overlaps(
                    rois[:, 1:], gt_boxes.data)  # [4000+, #gtboxes]
                im_inds = (rois[:, 0] + image_offset).long()  # [4000+]
                pred_to_gtbox[im_inds[:, None] != gt_classes.data[
                    None, :,
                    0]] = 0.0  # gt_classes, (im_inds, class); match the image index

                max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                # gt labels assignment; gt_classes: [#gtbox,2]
                labels = gt_classes[:, 1][argmax_overlaps]
                labels[max_overlaps < 0.5] = 0  # bad boxes; invalid labels
                # gt boxes assignment
                bbox_targets = gt_boxes[argmax_overlaps, :]
                tem = max_overlaps.view(max_overlaps.size()[0], 1)
                bbox_targets.data[torch.cat(
                    (tem, tem, tem, tem), 1) < 0.5] = 1  # arbitrary value
                # rois become not Tensor, unlike 'rpntrain' and 'gtbox'
                all_rois = Variable(rois)
                # Potentially you could add in GT rois if none match
                # is_match = (bbox_overlaps(rois[:,1:].contiguous(), gt_boxes.data) > 0.5).long()
                # gt_not_matched = (is_match.sum(0) == 0).nonzero()
                #
                # if gt_not_matched.dim() > 0:
                #     gt_to_add = torch.cat((gt_classes[:,0,None][gt_not_matched.squeeze(1)].float(),
                #                            gt_boxes[gt_not_matched.squeeze(1)]), 1)
                #
                #     all_rois = torch.cat((all_rois, gt_to_add),0)
                #     num_gt = gt_to_add.size(0)
                #labels = None
                #bbox_targets = None
                rel_labels = None

            # 'rpntrain' / 'gtbox'(sgcls)
            else:
                # all_rois:[1536,4], and labels, bbox_targets are all Tensor
                all_rois, labels, bbox_targets = proposal_assignments_det(
                    rois,
                    gt_boxes.data,
                    gt_classes.data,
                    image_offset,
                    fg_thresh=0.5)
                rel_labels = None

        else:
            all_rois = Variable(rois, volatile=True)
            labels = None
            bbox_targets = None
            rel_labels = None
            rpn_box_deltas = None
            rpn_scores = None

        return all_rois, labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels
def proposal_assignments_postnms(
        rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5,
        max_objs=100, max_rels=100, rand_val=0.01):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
    """
    pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64)
    pred_boxes_np = rois[:, 1:].cpu().numpy()
    nms_inds_np = nms_inds.cpu().numpy()
    sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np)

    # split into chosen and suppressed
    chosen_inds_np = pred_inds_np[nms_inds_np]
    chosen_boxes_np = pred_boxes_np[nms_inds_np]

    suppre_inds_np = pred_inds_np[sup_inds_np]
    suppre_boxes_np = pred_boxes_np[sup_inds_np]

    gt_boxes_np = gt_boxes.cpu().numpy()
    gt_classes_np = gt_classes.cpu().numpy()
    gt_rels_np = gt_rels.cpu().numpy()

    gt_classes_np[:, 0] -= image_offset
    gt_rels_np[:, 0] -= image_offset

    num_im = gt_classes_np[:, 0].max()+1

    rois = []
    obj_labels = []
    rel_labels = []
    num_box_seen = 0

    for im_ind in range(num_im):
        chosen_ind = np.where(chosen_inds_np == im_ind)[0]
        suppre_ind = np.where(suppre_inds_np == im_ind)[0]

        gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
        gt_boxes_i = gt_boxes_np[gt_ind]
        gt_classes_i = gt_classes_np[gt_ind, 1]
        gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]

        # Get IOUs between chosen and GT boxes and if needed we'll add more in

        chosen_boxes_i = chosen_boxes_np[chosen_ind]
        suppre_boxes_i = suppre_boxes_np[suppre_ind]

        n_chosen = chosen_boxes_i.shape[0]
        n_suppre = suppre_boxes_i.shape[0]
        n_gt_box = gt_boxes_i.shape[0]

        # add a teensy bit of random noise because some GT boxes might be duplicated, etc.
        pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0)
        ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*(
            np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5)

        # Let's say that a box can only be assigned ONCE for now because we've already done
        # the NMS and stuff.
        is_hit = ious > fg_thresh

        obj_assignments_i = is_hit.argmax(1)
        obj_assignments_i[~is_hit.any(1)] = -1

        vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True)
        obj_assignments_i[np.setdiff1d(
            np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1

        extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen

        # Add them in somewhere at random
        num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0])
        boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32)
        labels_i = np.zeros(num_inds_to_have, dtype=np.int64)

        inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False))
        inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms)

        boxes_i[inds_from_nms] = chosen_boxes_i
        labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]]

        boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add]
        labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]]

        # Now, we do the relationships. same as for rle
        all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i),
                               boxes_i,
                               labels_i,
                               gt_classes_i,
                               gt_rels_i,
                               fg_thresh=fg_thresh,
                               fg_rels_per_image=100)
        all_rels_i[:,0:2] += num_box_seen

        rois.append(np.column_stack((
            im_ind * np.ones(boxes_i.shape[0], dtype=np.float32),
            boxes_i,
        )))
        obj_labels.append(labels_i)
        rel_labels.append(np.column_stack((
            im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
            all_rels_i,
        )))
        num_box_seen += boxes_i.size

    rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True)
    labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True)
    rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(),
                                                                      async=True)

    return rois, labels, rel_labels
Esempio n. 17
0
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_classes=None,
                gt_rels=None,
                proposals=None,
                train_anchor_inds=None,
                return_fmap=False):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
            scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels
            
            if test:
            prob dists, boxes, img inds, maxscores, classes
            
        """

        # Detector
        result = self.detector(x,
                               im_sizes,
                               image_offset,
                               gt_boxes,
                               gt_classes,
                               gt_rels,
                               proposals,
                               train_anchor_inds,
                               return_fmap=True)
        if result.is_none():
            return ValueError("heck")

        #rcnn_pred = result.rm_obj_dists[:, 1:].max(1)[1] + 1  # +1: because the index is in 150-d but truth is 151-d
        #rcnn_ap = torch.mean((rcnn_pred == result.rm_obj_labels).float().cpu())

        im_inds = result.im_inds - image_offset
        # boxes: [#boxes, 4], without box deltas; where narrow error comes from, should .detach()
        boxes = result.rm_box_priors.detach()

        # Box and obj_dists APrecision
        obj_scores = F.softmax(result.rm_obj_dists, dim=1)
        result.rm_obj_preds = obj_scores.data[:, 1:].max(1)[1]
        result.rm_obj_preds = result.rm_obj_preds + 1
        twod_inds = arange(
            result.rm_obj_preds) * self.num_classes + result.rm_obj_preds
        bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(
            result.boxes_all.size(0), 4)
        pred_to_gtbox = bbox_overlaps(bboxes.data, gt_boxes.data)
        im_inds = result.im_inds
        pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :,
                                                               0]] = 0.0
        max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
        labels = gt_classes[:, 1][argmax_overlaps]
        labels[max_overlaps < 0.5] = 0
        labels[result.rm_obj_preds != result.rm_obj_labels.data] = 0
        result.ratio = torch.nonzero(labels).size(0) / labels.size(0)
        return result.ratio
        """
Esempio n. 18
0
def rel_assignments_det(im_inds,
                        rpn_rois,
                        roi_gtlabels,
                        gt_boxes,
                        gt_classes,
                        gt_rels,
                        image_offset,
                        fg_thresh=0.5,
                        num_sample_per_gt=4,
                        filter_non_overlap=True):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
        rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
    """

    pred_inds_np = im_inds.cpu().numpy()
    pred_boxes_np = rpn_rois.cpu().numpy()
    pred_boxlabels_np = roi_gtlabels.cpu().numpy()
    gt_boxes_np = gt_boxes.cpu().numpy()
    gt_classes_np = gt_classes.cpu().numpy()
    gt_rels_np = gt_rels.cpu().numpy()

    gt_classes_np[:, 0] -= image_offset
    gt_rels_np[:, 0] -= image_offset

    num_im = int(gt_classes_np[:, 0].max() + 1)

    num_fg = int(REL_FG_FRACTION * 2048 * num_im)
    num_rels = int(2048 * num_im)

    # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
    #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
    # ))

    fg_rels = []
    num_box_seen = 0
    bg_rels = []
    for im_ind in range(num_im):
        pred_ind = np.where(pred_inds_np == im_ind)[0]
        im_f_i = np.zeros(pred_ind.shape[0], dtype=np.float32)

        gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
        gt_boxes_i = gt_boxes_np[gt_ind]
        gt_classes_i = gt_classes_np[gt_ind, 1]
        gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]

        pred_boxes_i = pred_boxes_np[pred_ind]
        pred_boxlabels_i = pred_boxlabels_np[pred_ind]

        pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)

        rel_inds_i = np.stack(np.where((pbi_iou < 1) & (pbi_iou > 0)), -1)

        ious_i = bbox_overlaps(pred_boxes_i, gt_boxes_i)
        mask = (pred_boxlabels_i[:, None] == gt_classes_i[None, :]).astype(
            np.float32)

        min_ious_i = np.minimum(ious_i[rel_inds_i[:, 0], :][:, gt_rels_i[:, 0]], \
                                ious_i[rel_inds_i[:, 1], :][:, gt_rels_i[:, 1]]) * \
                                mask[rel_inds_i[:, 0], :][:, gt_rels_i[:, 0]] * \
                                mask[rel_inds_i[:, 1], :][:, gt_rels_i[:, 1]]

        all_rels_i = np.column_stack(
            (im_ind * np.ones(rel_inds_i.shape[0], dtype=np.int64),
             np.pad(rel_inds_i, ((0, 0), (0, 1)), 'constant')))
        all_rels_i[:, 1:3] += num_box_seen
        fg_inds_i = np.where(min_ious_i.max(-1) >= fg_thresh)[0]
        all_rels_i[fg_inds_i,
                   -1] = gt_rels_i[:, -1][min_ious_i.argmax(-1)[fg_inds_i]]
        fg_rels.append(all_rels_i[fg_inds_i])
        bg_rels.append(all_rels_i[np.where(all_rels_i[:, -1] == 0)[0]])

        num_box_seen += pred_boxes_i.shape[0]

    fg_rels = np.concatenate(fg_rels, 0)
    bg_rels = np.concatenate(bg_rels, 0)
    if fg_rels.shape[0] > num_fg:
        choice_ind = npr.choice(fg_rels.shape[0], num_fg, replace=False)
        fg_rels = fg_rels[choice_ind]
    num_bg = num_rels - fg_rels.shape[0]
    if num_bg > 0:
        if bg_rels.shape[0] > num_bg:
            # choice_ind = npr.choice(bg_rels.shape[0], num_bg, replace=False)
            choice_ind = torch.randperm(
                bg_rels.shape[0])[:num_bg].numpy().astype(np.int64)
            bg_rels = bg_rels[choice_ind]

        rel_labels = np.concatenate((fg_rels, bg_rels), 0)
    else:
        rel_labels = fg_rels

    rel_labels = torch.LongTensor(rel_labels).cuda(rpn_rois.get_device(),
                                                   async=True)

    return rel_labels
Esempio n. 19
0
def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)):
    det_res = detector[b]
    # if conf.num_gpus == 1:
    #     det_res = [det_res]
    assert conf.num_gpus == 1
    boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res

    gt_entry = {
        'gt_classes': val.gt_classes[batch_num].copy(),
        'gt_relations': val.relationships[batch_num].copy(),
        'gt_boxes': val.gt_boxes[batch_num].copy(),
    }
    # gt_entry = {'gt_classes': gtc[i], 'gt_relations': gtr[i], 'gt_boxes': gtb[i]}
    assert np.all(objs_i[rels_i[:, 0]] > 0) and np.all(
        objs_i[rels_i[:, 1]] > 0)
    # assert np.all(rels_i[:, 2] > 0)

    pred_entry = {
        'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE,
        'pred_classes': objs_i,
        'pred_rel_inds': rels_i,
        'obj_scores': obj_scores_i,
        'rel_scores': pred_scores_i,
    }
    pred_to_gt, pred_5ples, rel_scores = evaluator[
        conf.mode].evaluate_scene_graph_entry(
            gt_entry,
            pred_entry,
            iou_thresh=0.5 if 'det' in conf.mode else 0.9)

    # SET RECALL THRESHOLD HERE
    pred_to_gt = pred_to_gt[:20]
    pred_5ples = pred_5ples[:20]

    # Get a list of objects that match, and GT objects that dont
    objs_match = (bbox_overlaps(pred_entry['pred_boxes'], gt_entry['gt_boxes'])
                  >= 0.5) & (objs_i[:, None] == gt_entry['gt_classes'][None])
    objs_matched = objs_match.any(1)

    has_seen = defaultdict(int)
    has_seen_gt = defaultdict(int)
    pred_ind2name = {}
    gt_ind2name = {}
    edges = {}
    missededges = {}
    badedges = {}

    if val.filenames[batch_num].startswith('2343676'):
        import ipdb
        ipdb.set_trace()

    def query_pred(pred_ind):
        if pred_ind not in pred_ind2name:
            has_seen[objs_i[pred_ind]] += 1
            pred_ind2name[pred_ind] = '{}-{}'.format(
                train.ind_to_classes[objs_i[pred_ind]],
                has_seen[objs_i[pred_ind]])
        return pred_ind2name[pred_ind]

    def query_gt(gt_ind):
        gt_cls = gt_entry['gt_classes'][gt_ind]
        if gt_ind not in gt_ind2name:
            has_seen_gt[gt_cls] += 1
            gt_ind2name[gt_ind] = '{}-GT{}'.format(
                train.ind_to_classes[gt_cls], has_seen_gt[gt_cls])
        return gt_ind2name[gt_ind]

    matching_pred5ples = pred_5ples[np.array([len(x) > 0 for x in pred_to_gt])]
    for fiveple in matching_pred5ples:
        head_name = query_pred(fiveple[0])
        tail_name = query_pred(fiveple[1])

        edges[(head_name, tail_name)] = train.ind_to_predicates[fiveple[4]]

    gt_5ples = np.column_stack((
        gt_entry['gt_relations'][:, :2],
        gt_entry['gt_classes'][gt_entry['gt_relations'][:, 0]],
        gt_entry['gt_classes'][gt_entry['gt_relations'][:, 1]],
        gt_entry['gt_relations'][:, 2],
    ))
    has_match = reduce(np.union1d, pred_to_gt)
    for gt in gt_5ples[np.setdiff1d(np.arange(gt_5ples.shape[0]), has_match)]:
        # Head and tail
        namez = []
        # miss or missclassify or miss in topk.
        for i in range(2):
            matching_obj = np.where(objs_match[:, gt[i]])[0]
            if matching_obj.size > 0:
                name = query_pred(matching_obj[0])
            else:
                name = query_gt(gt[i])
            namez.append(name)

        missededges[tuple(namez)] = train.ind_to_predicates[gt[4]]

    for fiveple in pred_5ples[np.array([len(x) == 0 for x in pred_to_gt])]:
        # the objs in scene graph but the edges not.
        if fiveple[0] in pred_ind2name:
            if fiveple[1] in pred_ind2name:
                badedges[(pred_ind2name[fiveple[0]], pred_ind2name[fiveple[1]]
                          )] = train.ind_to_predicates[fiveple[4]]

    theimg = load_unscaled(val.filenames[batch_num])
    theimg2 = theimg.copy()
    draw2 = ImageDraw.Draw(theimg2)

    # Fix the names

    for pred_ind in pred_ind2name.keys():
        draw2 = draw_box(draw2,
                         pred_entry['pred_boxes'][pred_ind],
                         cls_ind=objs_i[pred_ind],
                         text_str=pred_ind2name[pred_ind])
    for gt_ind in gt_ind2name.keys():
        draw2 = draw_box(draw2,
                         gt_entry['gt_boxes'][gt_ind],
                         cls_ind=gt_entry['gt_classes'][gt_ind],
                         text_str=gt_ind2name[gt_ind])

    recall = int(100 * len(reduce(np.union1d, pred_to_gt)) /
                 gt_entry['gt_relations'].shape[0])

    id = '{}-{}'.format(val.filenames[batch_num].split('/')[-1][:-4], recall)
    containname = os.path.join(conf.save_dir,
                               conf.mode + '-qualitative-top' + str(20))
    if not os.path.exists(containname):
        os.mkdir(containname)
    pathname = os.path.join(containname, id)
    if not os.path.exists(pathname):
        os.mkdir(pathname)
    # theimg.save(os.path.join(pathname, 'img.jpg'), quality=100, subsampling=0)
    theimg2.save(os.path.join(pathname, 'imgbox.jpg'),
                 quality=100,
                 subsampling=0)

    with open(os.path.join(pathname, 'shit.txt'), 'w') as f:
        pred_objs = ' ,'.join(
            [train.ind_to_classes[i] for i in pred_entry['pred_classes']])
        gt_objs = ' ,'.join(
            [train.ind_to_classes[i] for i in gt_entry['gt_classes']])
        f.write('pred objs:\n' + pred_objs + '\n')
        f.write('gt objs:\n' + gt_objs + '\n')
        f.write('good:\n')
        for (o1, o2), p in edges.items():
            f.write('{} - {} - {}\n'.format(o1, p, o2))
        f.write('fn:\n')
        for (o1, o2), p in missededges.items():
            f.write('{} - {} - {}\n'.format(o1, p, o2))
        f.write('shit:\n')
        for (o1, o2), p in badedges.items():
            f.write('{} - {} - {}\n'.format(o1, p, o2))
    pass
Esempio n. 20
0
    def forward(self, x, im_sizes, image_offset,
                gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None,
                return_fmap=False):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
            scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels
            
            if test:
            prob dists, boxes, img inds, maxscores, classes
            
        """

        # Detector
        result = self.detector(x, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, proposals,
                               train_anchor_inds, return_fmap=True)
        if result.is_none():
            return ValueError("heck")
        im_inds = result.im_inds - image_offset
        # boxes: [#boxes, 4], without box deltas; where narrow error comes from, should .detach()
        boxes = result.rm_box_priors    # .detach()   

        if self.training and result.rel_labels is None:
            assert self.mode == 'sgdet' # sgcls's result.rel_labels is gt and not None
            # rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
            result.rel_labels = rel_assignments(im_inds.data, boxes.data, result.rm_obj_labels.data,
                                                gt_boxes.data, gt_classes.data, gt_rels.data,
                                                image_offset, filter_non_overlap=True,
                                                num_sample_per_gt=1)

        #torch.cat((result.rel_labels[:,0].contiguous().view(rel_inds.size(0),1),result.rm_obj_labels[result.rel_labels[:,1]].view(rel_inds.size(0),1),result.rm_obj_labels[result.rel_labels[:,2]].view(rel_inds.size(0),1),result.rel_labels[:,3].contiguous().view(rel_inds.size(0),1)),-1)
        #bbox_overlaps(boxes.data[55:57].contiguous().view(-1,1), boxes.data[8].contiguous().view(-1,1))
        rel_inds = self.get_rel_inds(result.rel_labels, im_inds, boxes)  #[275,3], [im_inds, box1_inds, box2_inds]
        
        # rois: [#boxes, 5]
        rois = torch.cat((im_inds[:, None].float(), boxes), 1)
        # result.rm_obj_fmap: [384, 4096]
        #result.rm_obj_fmap = self.obj_feature_map(result.fmap.detach(), rois) # detach: prevent backforward flowing
        result.rm_obj_fmap = self.obj_feature_map(result.fmap.detach(), rois) # detach: prevent backforward flowing

        # BiLSTM
        result.rm_obj_dists, result.rm_obj_preds, edge_ctx = self.context(
            result.rm_obj_fmap,   # has been detached above
            # rm_obj_dists: [#boxes, 151]; Prevent gradients from flowing back into score_fc from elsewhere
            result.rm_obj_dists.detach(),  # .detach:Returns a new Variable, detached from the current graph
            im_inds, result.rm_obj_labels if self.training or self.mode == 'predcls' else None,
            boxes.data, result.boxes_all if self.mode == 'sgdet' else result.boxes_all)
        

        # Post Processing
        # nl_egde <= 0
        if edge_ctx is None:
            edge_rep = self.post_emb(result.rm_obj_preds)
        # nl_edge > 0
        else: 
            edge_rep = self.post_lstm(edge_ctx)  # [384, 4096*2]
     
        # Split into subject and object representations
        edge_rep = edge_rep.view(edge_rep.size(0), 2, self.pooling_dim)  #[384,2,4096]
        subj_rep = edge_rep[:, 0]  # [384,4096]
        obj_rep = edge_rep[:, 1]  # [384,4096]
        prod_rep = subj_rep[rel_inds[:, 1]] * obj_rep[rel_inds[:, 2]]  # prod_rep, rel_inds: [275,4096], [275,3]
    

        if self.use_vision: # True when sgdet
            # union rois: fmap.detach--RoIAlignFunction--roifmap--vr [275,4096]
            vr = self.visual_rep(result.fmap.detach(), rois, rel_inds[:, 1:])

            if self.limit_vision:  # False when sgdet
                # exact value TBD
                prod_rep = torch.cat((prod_rep[:,:2048] * vr[:,:2048], prod_rep[:,2048:]), 1) 
            else:
                prod_rep = prod_rep * vr  # [275,4096]


        if self.use_tanh:  # False when sgdet
            prod_rep = F.tanh(prod_rep)

        result.rel_dists = self.rel_compress(prod_rep)  # [275,51]

        if self.use_bias:  # True when sgdet
            result.rel_dists = result.rel_dists + self.freq_bias.index_with_labels(torch.stack((
                result.rm_obj_preds[rel_inds[:, 1]],
                result.rm_obj_preds[rel_inds[:, 2]],
            ), 1))

        # Attention: pos should use rm_obj_labes/rel_labels for obj/rel scores; neg should use rm_obj_preds/max_rel_score for obj/rel scores
        if self.training: 
            judge = result.rel_labels.data[:,3] != 0
            if judge.sum() != 0:  # gt_rel exit in rel_inds
                # positive overall score
                select_rel_inds = torch.arange(rel_inds.size(0)).view(-1,1).long().cuda()[result.rel_labels.data[:,3] != 0]
                com_rel_inds = rel_inds[select_rel_inds]
                twod_inds = arange(result.rm_obj_labels.data) * self.num_classes + result.rm_obj_labels.data  # dist: [-10,10]
                result.obj_scores = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds]   # only 1/4 of 384 obj_dists will be updated; because only 1/4 objs's labels are not 0
              
                obj_scores0 = result.obj_scores[com_rel_inds[:,1]]
                obj_scores1 = result.obj_scores[com_rel_inds[:,2]]
                rel_rep = F.softmax(result.rel_dists[select_rel_inds], dim=1)    # result.rel_dists has grad
                rel_score = rel_rep.gather(1, result.rel_labels[select_rel_inds][:,3].contiguous().view(-1,1)).view(-1)  # not use squeeze(); SqueezeBackward, GatherBackward
                prob_score = rel_score * obj_scores0 * obj_scores1

                # negative overall score
                rel_cands = im_inds.data[:, None] == im_inds.data[None]
                rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0   # self relation = 0
                if self.require_overlap:     
                    rel_cands = rel_cands & (bbox_overlaps(boxes.data, boxes.data) > 0)   # Require overlap for detection
                rel_cands = rel_cands.nonzero()  # [#, 2]
                if rel_cands.dim() == 0:
                    print("rel_cands.dim() == 0!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                    rel_cands = im_inds.data.new(1, 2).fill_(0) # shaped: [1,2], [0, 0]
                rel_cands = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) # rel_cands' value should be [0, 384]
                rel_inds_neg = rel_cands

                vr_neg = self.visual_rep(result.fmap.detach(), rois, rel_inds_neg[:, 1:])
                subj_obj = subj_rep[rel_inds_neg[:, 1]] * obj_rep[rel_inds_neg[:, 2]]
                prod_rep_neg =  subj_obj * vr_neg
                rel_dists_neg = self.rel_compress(prod_rep_neg)
                all_rel_rep_neg = F.softmax(rel_dists_neg, dim=1)
                _, pred_classes_argmax_neg = all_rel_rep_neg.data[:,1:].max(1)
                pred_classes_argmax_neg = pred_classes_argmax_neg + 1
                all_rel_pred_neg = torch.cat((rel_inds_neg, pred_classes_argmax_neg.view(-1,1)), 1)
                ind_old = torch.ones(all_rel_pred_neg.size(0)).byte().cuda()
                for i in range(com_rel_inds.size(0)):    # delete those box pair with same rel type as pos triplets
                    ind_i = (all_rel_pred_neg[:,0] == com_rel_inds[i, 0]) & (all_rel_pred_neg[:,1] == com_rel_inds[i, 1]) & (result.rm_obj_preds.data[all_rel_pred_neg[:,1]] == result.rm_obj_labels.data[com_rel_inds[i, 1]]) & (all_rel_pred_neg[:,2] == com_rel_inds[i, 2]) & (result.rm_obj_preds.data[all_rel_pred_neg[:,2]] == result.rm_obj_labels.data[com_rel_inds[i, 2]]) & (all_rel_pred_neg[:,3] == result.rel_labels.data[select_rel_inds][i,3]) 
                    ind_i = (1 - ind_i).byte()
                    ind_old = ind_i & ind_old

                rel_inds_neg = rel_inds_neg.masked_select(ind_old.view(-1,1).expand(-1,3) == 1).view(-1,3)
                rel_rep_neg = all_rel_rep_neg.masked_select(Variable(ind_old.view(-1,1).expand(-1,51)) == 1).view(-1,51)
                pred_classes_argmax_neg = pred_classes_argmax_neg.view(-1,1)[ind_old.view(-1,1) == 1]
                rel_labels_pred_neg = all_rel_pred_neg.masked_select(ind_old.view(-1,1).expand(-1,4) == 1).view(-1,4)

                max_rel_score_neg = rel_rep_neg.gather(1, Variable(pred_classes_argmax_neg.view(-1,1))).view(-1)  # not use squeeze()
                twod_inds_neg = arange(result.rm_obj_preds.data) * self.num_classes + result.rm_obj_preds.data
                obj_scores_neg = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds_neg] 
                obj_scores0_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,1]])
                obj_scores1_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,2]])
                all_score_neg = max_rel_score_neg * obj_scores0_neg * obj_scores1_neg
                # delete those triplet whose score is lower than pos triplets
                prob_score_neg = all_score_neg[all_score_neg.data > prob_score.data.min()] if (all_score_neg.data > prob_score.data.min()).sum() != 0 else all_score_neg


                # use all rel_inds, already irrelavant with im_inds, which is only use to extract region from img and produce rel_inds
                # 384 boxes---(rel_inds)(rel_inds_neg)--->prob_score,prob_score_neg 
                flag = torch.cat((torch.ones(prob_score.size(0),1).cuda(),torch.zeros(prob_score_neg.size(0),1).cuda()),0)
                all_prob = torch.cat((prob_score,prob_score_neg), 0)  # Variable, [#pos_inds+#neg_inds, 1]

                _, sort_prob_inds = torch.sort(all_prob.data, dim=0, descending=True)

                sorted_flag = flag[sort_prob_inds].view(-1)  # can be used to check distribution of pos and neg
                sorted_all_prob = all_prob[sort_prob_inds]  # Variable
                
                # positive triplet score
                pos_exp = sorted_all_prob[sorted_flag == 1]  # Variable 
                # negative triplet score
                neg_exp = sorted_all_prob[sorted_flag == 0]  # Variable

                # determine how many rows will be updated in rel_dists_neg
                pos_repeat = torch.zeros(1, 1)
                neg_repeat = torch.zeros(1, 1)
                for i in range(pos_exp.size(0)):
                    if ( neg_exp.data > pos_exp.data[i] ).sum() != 0:
                        int_part = (neg_exp.data > pos_exp.data[i]).sum()
                        temp_pos_inds = torch.ones(int_part) * i
                        pos_repeat =  torch.cat((pos_repeat, temp_pos_inds.view(-1,1)), 0)
                        temp_neg_inds = torch.arange(int_part)
                        neg_repeat = torch.cat((neg_repeat, temp_neg_inds.view(-1,1)), 0)
                    else:
                        temp_pos_inds = torch.ones(1)* i
                        pos_repeat =  torch.cat((pos_repeat, temp_pos_inds.view(-1,1)), 0)
                        temp_neg_inds = torch.arange(1)
                        neg_repeat = torch.cat((neg_repeat, temp_neg_inds.view(-1,1)), 0)

                """
                int_part = neg_exp.size(0) // pos_exp.size(0)
                decimal_part = neg_exp.size(0) % pos_exp.size(0)
                int_inds = torch.arange(pos_exp.size(0))[:,None].expand_as(torch.Tensor(pos_exp.size(0), int_part)).contiguous().view(-1)
                int_part_inds = (int(pos_exp.size(0) -1) - int_inds).long().cuda() # use minimum pos to correspond maximum negative
                if decimal_part == 0:
                    expand_inds = int_part_inds
                else:
                    expand_inds = torch.cat((torch.arange(pos_exp.size(0))[(pos_exp.size(0) - decimal_part):].long().cuda(), int_part_inds), 0)  
                
                result.pos = pos_exp[expand_inds]
                result.neg = neg_exp
                result.anchor = Variable(torch.zeros(result.pos.size(0)).cuda())
                """
                result.pos = pos_exp[pos_repeat.cuda().long().view(-1)]
                result.neg = neg_exp[neg_repeat.cuda().long().view(-1)]
                result.anchor = Variable(torch.zeros(result.pos.size(0)).cuda())
                

                result.ratio = torch.ones(3).cuda()
                result.ratio[0] = result.ratio[0] * (sorted_flag.nonzero().min() / (prob_score.size(0) + all_score_neg.size(0)))
                result.ratio[1] = result.ratio[1] * (sorted_flag.nonzero().max() / (prob_score.size(0) + all_score_neg.size(0)))
                result.ratio[2] = result.ratio[2] * (prob_score.size(0) + all_score_neg.size(0))

                return result

            else:  # no gt_rel in rel_inds
                print("no gt_rel in rel_inds!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                ipdb.set_trace()
                # testing triplet proposal
                rel_cands = im_inds.data[:, None] == im_inds.data[None]
                # self relation = 0
                rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0
                # Require overlap for detection
                if self.require_overlap:
                    rel_cands = rel_cands & (bbox_overlaps(boxes.data, boxes.data) > 0)
                rel_cands = rel_cands.nonzero()
                if rel_cands.dim() == 0:
                    print("rel_cands.dim() == 0!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                    rel_cands = im_inds.data.new(1, 2).fill_(0)
                rel_cands = torch.cat((im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1)
                rel_labels_neg = rel_cands
                rel_inds_neg = rel_cands

                twod_inds_neg = arange(result.rm_obj_preds.data) * self.num_classes + result.rm_obj_preds.data
                obj_scores_neg = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds_neg]
                vr_neg = self.visual_rep(result.fmap.detach(), rois, rel_inds_neg[:, 1:])
                subj_obj = subj_rep[rel_inds_neg[:, 1]] * obj_rep[rel_inds_neg[:, 2]]
                prod_rep_neg = subj_obj * vr_neg
                rel_dists_neg = self.rel_compress(prod_rep_neg)
                # negative overall score
                obj_scores0_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,1]])
                obj_scores1_neg = Variable(obj_scores_neg.data[rel_inds_neg[:,2]])
                rel_rep_neg = F.softmax(rel_dists_neg, dim=1)
                _, pred_classes_argmax_neg = rel_rep_neg.data[:,1:].max(1)
                pred_classes_argmax_neg = pred_classes_argmax_neg + 1

                max_rel_score_neg = rel_rep_neg.gather(1, Variable(pred_classes_argmax_neg.view(-1,1))).view(-1)  # not use squeeze()
                prob_score_neg = max_rel_score_neg * obj_scores0_neg * obj_scores1_neg

                result.pos = Variable(torch.zeros(prob_score_neg.size(0)).cuda())
                result.neg = prob_score_neg
                result.anchor = Variable(torch.zeros(prob_score_neg.size(0)).cuda())

                result.ratio = torch.ones(3,1).cuda()

                return result
        ###################### Testing ###########################

        # extract corrsponding scores according to the box's preds
        twod_inds = arange(result.rm_obj_preds.data) * self.num_classes + result.rm_obj_preds.data
        result.obj_scores = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds]   # [384]

        # Bbox regression
        if self.mode == 'sgdet':
            bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4)
        else:
            # Boxes will get fixed by filter_dets function.
            bboxes = result.rm_box_priors

        rel_rep = F.softmax(result.rel_dists, dim=1)    # [275, 51]
        
        # sort product of obj1 * obj2 * rel
        return filter_dets(bboxes, result.obj_scores,
                           result.rm_obj_preds, rel_inds[:, 1:],
                           rel_rep)
Esempio n. 21
0
def rel_assignments_sgdet(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset,
                    fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
        rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
    """
    fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64))

    pred_inds_np = im_inds.cpu().numpy()
    pred_boxes_np = rpn_rois.cpu().numpy()
    pred_boxlabels_np = roi_gtlabels.cpu().numpy()
    gt_boxes_np = gt_boxes.cpu().numpy()
    gt_classes_np = gt_classes.cpu().numpy()
    gt_rels_np = gt_rels.cpu().numpy()

    gt_classes_np[:, 0] -= image_offset
    gt_rels_np[:, 0] -= image_offset

    num_im = gt_classes_np[:, 0].max()+1

    # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
    #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
    # ))

    rel_labels = []
    num_box_seen = 0
    for im_ind in range(num_im):
        pred_ind = np.where(pred_inds_np == im_ind)[0]

        gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
        gt_boxes_i = gt_boxes_np[gt_ind]
        gt_classes_i = gt_classes_np[gt_ind, 1]
        gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]

        # [num_pred, num_gt]
        pred_boxes_i = pred_boxes_np[pred_ind]
        pred_boxlabels_i = pred_boxlabels_np[pred_ind]

        ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
        is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh)

        # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
        pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)
        if filter_non_overlap:
            rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
            rels_intersect = rel_possibilities
        else:
            rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]),
                                        dtype=np.int64) - np.eye(pred_boxes_i.shape[0],
                                                                 dtype=np.int64)
            rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)

        # ONLY select relations between ground truth because otherwise we get useless data
        rel_possibilities[pred_boxlabels_i == 0] = 0
        rel_possibilities[:, pred_boxlabels_i == 0] = 0

        # Sample the GT relationships.
        fg_rels = []
        p_size = []
        for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i):
            fg_rels_i = []
            fg_scores_i = []

            for from_ind in np.where(is_match[:, from_gtind])[0]:
                for to_ind in np.where(is_match[:, to_gtind])[0]:
                    if from_ind != to_ind:
                        fg_rels_i.append((from_ind, to_ind, rel_id))
                        fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind]))
                        rel_possibilities[from_ind, to_ind] = 0
            if len(fg_rels_i) == 0:
                continue
            p = np.array(fg_scores_i)
            p = p / p.sum()
            p_size.append(p.shape[0])
            num_to_add = min(p.shape[0], num_sample_per_gt)
            for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False):
                fg_rels.append(fg_rels_i[rel_to_add])

        fg_rels = np.array(fg_rels, dtype=np.int64)
        if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
            fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)]
        elif fg_rels.size == 0:
            fg_rels = np.zeros((0, 3), dtype=np.int64)

        bg_rels = np.column_stack(np.where(rel_possibilities))
        bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))

        num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0])
        if bg_rels.size > 0:
            # Sample 4x as many intersecting relationships as non-intersecting.
            # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
            # p = bg_rels_intersect.astype(np.float32)
            # p[bg_rels_intersect == 0] = 0.2
            # p[bg_rels_intersect == 1] = 0.8
            # p /= p.sum()
            bg_rels = bg_rels[
                np.random.choice(bg_rels.shape[0],
                                 #p=p,
                                 size=num_bg_rel, replace=False)]
        else:
            bg_rels = np.zeros((0, 3), dtype=np.int64)

        if fg_rels.size == 0 and bg_rels.size == 0:
            # Just put something here
            bg_rels = np.array([[0, 0, 0]], dtype=np.int64)

        # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
        all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
        all_rels_i[:,0:2] += num_box_seen

        all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))]

        rel_labels.append(np.column_stack((
            im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
            all_rels_i,
        )))

        num_box_seen += pred_boxes_i.shape[0]
    rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(),
                                                                      async=True)
    return rel_labels
Esempio n. 22
0
def _sel_rels(ious,
              pred_boxes,
              pred_labels,
              gt_classes,
              gt_rels,
              fg_thresh=0.5,
              fg_rels_per_image=128,
              num_sample_per_gt=1,
              filter_non_overlap=True):
    """
    Selects the relations needed
    :param ious: [num_pred', num_gt]
    :param pred_boxes: [num_pred', num_gt]
    :param pred_labels: [num_pred']
    :param gt_classes: [num_gt]
    :param gt_rels: [num_gtrel, 3]
    :param fg_thresh: 
    :param fg_rels_per_image: 
    :return: new rels, [num_predrel, 3] where each is (pred_ind1, pred_ind2, predicate)
    """
    is_match = (ious >= fg_thresh) & (pred_labels[:, None]
                                      == gt_classes[None, :])

    pbi_iou = bbox_overlaps(pred_boxes, pred_boxes)

    # Limit ourselves to only IOUs that overlap, but are not the exact same box
    # since we duplicated stuff earlier.
    if filter_non_overlap:
        rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
        rels_intersect = rel_possibilities
    else:
        rel_possibilities = np.ones(
            (pred_labels.shape[0], pred_labels.shape[0]),
            dtype=np.int64) - np.eye(pred_labels.shape[0], dtype=np.int64)
        rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)

    # ONLY select relations between ground truth because otherwise we get useless data
    rel_possibilities[pred_labels == 0] = 0
    rel_possibilities[:, pred_labels == 0] = 0

    # For each GT relationship, sample exactly 1 relationship.
    fg_rels = []
    p_size = []
    for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels):
        fg_rels_i = []
        fg_scores_i = []

        for from_ind in np.where(is_match[:, from_gtind])[0]:
            for to_ind in np.where(is_match[:, to_gtind])[0]:
                if from_ind != to_ind:
                    fg_rels_i.append((from_ind, to_ind, rel_id))
                    fg_scores_i.append(
                        (ious[from_ind, from_gtind] * ious[to_ind, to_gtind]))
                    rel_possibilities[from_ind, to_ind] = 0
        if len(fg_rels_i) == 0:
            continue
        p = np.array(fg_scores_i)
        p = p / p.sum()
        p_size.append(p.shape[0])
        num_to_add = min(p.shape[0], num_sample_per_gt)
        for rel_to_add in npr.choice(p.shape[0],
                                     p=p,
                                     size=num_to_add,
                                     replace=False):
            fg_rels.append(fg_rels_i[rel_to_add])

    bg_rels = np.column_stack(np.where(rel_possibilities))
    bg_rels = np.column_stack(
        (bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))

    fg_rels = np.array(fg_rels, dtype=np.int64)
    if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
        fg_rels = fg_rels[npr.choice(fg_rels.shape[0],
                                     size=fg_rels_per_image,
                                     replace=False)]
        # print("{} scores for {} GT. max={} min={} BG rels {}".format(
        #     fg_rels_scores.shape[0], gt_rels.shape[0], fg_rels_scores.max(), fg_rels_scores.min(),
        #     bg_rels.shape))
    elif fg_rels.size == 0:
        fg_rels = np.zeros((0, 3), dtype=np.int64)

    num_bg_rel = min(RELS_PER_IMG - fg_rels.shape[0], bg_rels.shape[0])
    if bg_rels.size > 0:

        # Sample 4x as many intersecting relationships as non-intersecting.
        bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
        p = bg_rels_intersect.astype(np.float32)
        p[bg_rels_intersect == 0] = 0.2
        p[bg_rels_intersect == 1] = 0.8
        p /= p.sum()
        bg_rels = bg_rels[np.random.choice(bg_rels.shape[0],
                                           p=p,
                                           size=num_bg_rel,
                                           replace=False)]
    else:
        bg_rels = np.zeros((0, 3), dtype=np.int64)

    #print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))

    all_rels = np.concatenate((fg_rels, bg_rels), 0)

    # Sort by 2nd ind and then 1st ind
    all_rels = all_rels[np.lexsort((all_rels[:, 1], all_rels[:, 0]))]
    return all_rels
Esempio n. 23
0
    def forward(self, x, im_sizes, image_offset,
                gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None,
                return_fmap=False):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param proposals: things
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
        """
        fmap = self.feature_map(x)

        # Get boxes from RPN
        rois, obj_labels, bbox_targets, rpn_scores, rpn_box_deltas, rel_labels = \
            self.get_boxes(fmap, im_sizes, image_offset, gt_boxes,
                           gt_classes, gt_rels, train_anchor_inds, proposals=proposals)

        # Now classify them
        obj_fmap = self.obj_feature_map(fmap, rois)
        od_obj_dists = self.score_fc(obj_fmap)
        od_box_deltas = self.bbox_fc(obj_fmap).view(
            -1, len(self.classes), 4) if self.mode != 'gtbox' else None

        od_box_priors = rois[:, 1:]

        if (not self.training and not self.mode == 'gtbox') or self.mode in ('proposals', 'refinerels'):
            nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                od_obj_dists,
                rois,
                od_box_deltas, im_sizes,
            )
            im_inds = nms_imgs + image_offset
            obj_dists = od_obj_dists[nms_inds]
            obj_fmap = obj_fmap[nms_inds]
            box_deltas = od_box_deltas[nms_inds]
            box_priors = nms_boxes[:, 0]

            if self.training and not self.mode == 'gtbox':
                # NOTE: If we're doing this during training, we need to assign labels here.
                pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0

                max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                rm_obj_labels[max_overlaps < 0.5] = 0
            else:
                rm_obj_labels = None
        else:
            im_inds = rois[:, 0].long().contiguous() + image_offset
            nms_scores = None
            nms_preds = None
            nms_boxes_assign = None
            nms_boxes = None
            box_priors = rois[:, 1:]
            rm_obj_labels = obj_labels
            box_deltas = od_box_deltas
            obj_dists = od_obj_dists

        return Result(
            od_obj_dists=od_obj_dists,
            rm_obj_dists=obj_dists,
            obj_scores=nms_scores,
            obj_preds=nms_preds,
            obj_fmap=obj_fmap,
            od_box_deltas=od_box_deltas,
            rm_box_deltas=box_deltas,
            od_box_targets=bbox_targets,
            rm_box_targets=bbox_targets,
            od_box_priors=od_box_priors,
            rm_box_priors=box_priors,
            boxes_assigned=nms_boxes_assign,
            boxes_all=nms_boxes,
            od_obj_labels=obj_labels,
            rm_obj_labels=rm_obj_labels,
            rpn_scores=rpn_scores,
            rpn_box_deltas=rpn_box_deltas,
            rel_labels=rel_labels,
            im_inds=im_inds,
            fmap=fmap if return_fmap else None,
        )
Esempio n. 24
0
def rel_assignments(im_inds,
                    rpn_rois,
                    roi_gtlabels,
                    roi_predscore,
                    gt_boxes,
                    gt_classes,
                    gt_rels,
                    image_offset,
                    fg_thresh=0.5,
                    num_sample_per_gt=4,
                    filter_non_overlap=True):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
        rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
    """
    if conf.sl_train:
        rels_per_image = RELS_PER_IMG_SGDET_SL
    elif conf.rl_train:
        rels_per_image = RELS_PER_IMG_SGDET_RL
    else:
        raise ValueError

    fg_rels_per_image = int(np.round(REL_FG_FRACTION * rels_per_image))

    pred_inds_np = im_inds.cpu().numpy()
    pred_boxes_np = rpn_rois.cpu().numpy()
    pred_boxlabels_np = roi_gtlabels.cpu().numpy()
    gt_boxes_np = gt_boxes.cpu().numpy()
    gt_classes_np = gt_classes.cpu().numpy()
    gt_rels_np = gt_rels.cpu().numpy()

    # test1_4 change roi_pred_score to each object score
    roi_predscore_np = roi_predscore.cpu().numpy()
    norm_roi_predscore_np = np.exp(roi_predscore_np) / (
        np.exp(roi_predscore_np + 1e-8).sum(1)[:, None])
    each_roi_predlabel = np.argmax(norm_roi_predscore_np[:, 1:], 1) + 1
    each_roi_predscore = norm_roi_predscore_np[
        np.arange(norm_roi_predscore_np.shape[0]), each_roi_predlabel]

    # extra add for new_visual_genome.py
    # gt_rels_np = rel_assign_dist2idx(gt_rels_np)
    gt_classes_np[:, 0] -= image_offset
    gt_rels_np[:, 0] -= image_offset

    num_im = gt_classes_np[:, 0].max() + 1

    # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
    #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
    # ))
    rel_labels = []
    num_box_seen = 0
    for im_ind in range(num_im):
        pred_ind = np.where(pred_inds_np == im_ind)[0]

        gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
        gt_boxes_i = gt_boxes_np[gt_ind]
        gt_classes_i = gt_classes_np[gt_ind, 1]
        gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]

        # [num_pred, num_gt]
        pred_boxes_i = pred_boxes_np[pred_ind]
        pred_boxlabels_i = pred_boxlabels_np[pred_ind]

        # test1_4
        pred_score_i = each_roi_predscore[pred_ind]

        ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
        is_match = (pred_boxlabels_i[:, None]
                    == gt_classes_i[None]) & (ious >= fg_thresh)

        # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
        pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)
        if filter_non_overlap:
            rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
            rels_intersect = rel_possibilities
        else:
            rel_possibilities = np.ones(
                (pred_boxes_i.shape[0], pred_boxes_i.shape[0]),
                dtype=np.int64) - np.eye(pred_boxes_i.shape[0], dtype=np.int64)
            rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)

        # extra set to comments
        # ONLY select relations between ground truth because otherwise we get useless data
        # rel_possibilities[pred_boxlabels_i == 0] = 0
        # rel_possibilities[:, pred_boxlabels_i == 0] = 0

        # Sample the GT relationships.
        fg_rels = []
        p_size = []
        for i, each_gt_rels_i in enumerate(gt_rels_i):
            from_gtind, to_gtind, rel_id = each_gt_rels_i[0], each_gt_rels_i[
                1], each_gt_rels_i[2:]
            fg_rels_i = []
            fg_scores_i = []

            for from_ind in np.where(is_match[:, from_gtind])[0]:
                for to_ind in np.where(is_match[:, to_gtind])[0]:
                    if from_ind != to_ind:
                        fg_rels_i.append(
                            np.concatenate(
                                (np.array([from_ind, to_ind]), rel_id), 0))
                        fg_scores_i.append((ious[from_ind, from_gtind] *
                                            ious[to_ind, to_gtind]))
                        rel_possibilities[from_ind, to_ind] = 0
            if len(fg_rels_i) == 0:
                continue

            p = np.array(fg_scores_i)
            p = p / p.sum()
            p_size.append(p.shape[0])
            num_to_add = min(p.shape[0], num_sample_per_gt)
            for rel_to_add in npr.choice(p.shape[0],
                                         p=p,
                                         size=num_to_add,
                                         replace=False):
                fg_rels.append(fg_rels_i[rel_to_add])

        if len(fg_rels) > 0:
            fg_rels = np.vstack(fg_rels)
            if fg_rels.shape[0] > fg_rels_per_image:
                fg_rels = fg_rels[npr.choice(fg_rels.shape[0],
                                             size=fg_rels_per_image,
                                             replace=False)]
        else:
            fg_rels = np.zeros((0, 53), dtype=np.int64)

        bg_rels = np.column_stack(np.where(rel_possibilities))
        bg_rels = np.column_stack(
            (bg_rels, np.ones(bg_rels.shape[0], dtype=np.int64),
             np.zeros((bg_rels.shape[0], 50), dtype=np.int64)))

        num_bg_rel = min(rels_per_image - fg_rels.shape[0], bg_rels.shape[0])

        if bg_rels.size > 0:
            # test1_2, test1_3
            # origin
            # bg_rels = bg_rels[
            #     np.random.choice(bg_rels.shape[0],
            #                      #p=p,
            #                      size=num_bg_rel, replace=False)]
            # test1_4
            sub_pred_score_i = pred_score_i[bg_rels[:, 0]]
            obj_pred_score_i = pred_score_i[bg_rels[:, 1]]
            bg_rels_idx_i = np.argsort(
                sub_pred_score_i *
                obj_pred_score_i)[-num_bg_rel:]  # larget in the tail
            bg_rels = bg_rels[bg_rels_idx_i]
        else:
            bg_rels = np.zeros((0, 53), dtype=np.int64)

        if fg_rels.size == 0 and bg_rels.size == 0:
            # Just put something here
            bg_rels = np.zeros((0, 53), dtype=np.int64)

        # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
        all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
        all_rels_i[:, 0:2] += num_box_seen

        all_rels_i = all_rels_i[np.lexsort((all_rels_i[:, 1], all_rels_i[:,
                                                                         0]))]

        rel_labels.append(
            np.column_stack((
                im_ind * np.ones(all_rels_i.shape[0], dtype=np.int64),
                all_rels_i,
            )))

        num_box_seen += pred_boxes_i.shape[0]

    rel_labels_np = np.concatenate(rel_labels, 0)

    # extra add for new_visual_genome.py
    # num_rel_labels = rel_labels_np.shape[0]
    # rel_labels_tail = np.zeros((num_rel_labels, 51))
    # rel_labels_tail[range(num_rel_labels), rel_labels_np[:, -1]] = 1
    # rel_labels_head = rel_labels_np[:, :3]
    # rel_labels_np = np.concatenate((rel_labels_head, rel_labels_tail), 1)

    rel_labels = torch.LongTensor(rel_labels_np).cuda(rpn_rois.get_device(),
                                                      async=True)
    return rel_labels